Creating the Grid for the baseline Model

In [1]:
import pandas as pd
import numpy as np
import branca.colormap as cm  # Used for color gradient
import folium
import geopandas

Get Detectors

In [2]:
df_sensors = pd.read_csv(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\HSLU_DSPRO1_TrafficStatus\data\RawDataLondon\London_detectors.csv")
df_sensors.head()

Unnamed: 0,detid,length,pos,fclass,road,limit,citycode,lanes,linkid,long,lat
0,EAST_N04/161x1,0.303585,0.261157,secondary,Homerton Road,,london,1.0,5082.0,-0.021497,51.550929
1,EAST_N04/161y1,0.103679,0.063417,primary,Eastway,,london,1.0,5091.0,-0.020899,51.550704
2,EAST_N04/162a1,0.260623,0.117906,secondary,Homerton Road,,london,1.0,5083.0,-0.022649,51.550907
3,EAST_N04/162a2,0.216874,0.117942,secondary,Homerton Road,,london,1.0,5084.0,-0.022617,51.55088
4,EAST_N04/163f1,0.344754,0.329789,primary,Eastway,,london,1.0,5092.0,-0.019288,51.552281


Get Sensors from Models


In [3]:
import joblib
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV

In [8]:
def get_knn_prediction(models_path, weekday, interval_values=[
               0, 3600, 7200, 10800, 14400, 18000, 21600, 25200, 28800, 32400, 
               36000, 39600, 43200, 46800, 50400, 54000, 57600, 61200, 64800, 
               68400, 72000, 75600, 79200, 82800]):
    
    X_values = pd.DataFrame(interval_values, columns=['interval'])
    X_values['weekday'] = weekday
    
    predictions = []
    
    for model_filename in os.listdir(models_path):
        model_path = os.path.join(models_path, model_filename)
        if os.path.isfile(model_path):
            # Load the KNN model
            sensor_model = joblib.load(model_path)
            y_pred = sensor_model.predict(X_values)
            
            # Store predictions in DataFrame format
            predictions.append(pd.DataFrame({
                'traffic': y_pred,
                'detid': model_filename.replace('-', '/').replace('.pkl', ''),
                'interval': X_values['interval'],
            }))
        
    return pd.concat(predictions)


In [9]:
df_monday = get_knn_prediction(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\data\knn", 0)
df_monday.head()

Unnamed: 0,traffic,detid,interval
0,6.6,CNTR_N00/005g1,0
1,4.8,CNTR_N00/005g1,3600
2,2.2,CNTR_N00/005g1,7200
3,1.5,CNTR_N00/005g1,10800
4,1.2,CNTR_N00/005g1,14400


In [10]:
df_real = pd.merge(df_monday, df_sensors, on='detid', how='left')


df_real = df_real[df_real['interval'] == 0]
df_real.head()

Unnamed: 0,traffic,detid,interval,length,pos,fclass,road,limit,citycode,lanes,linkid,long,lat
0,6.6,CNTR_N00/005g1,0,0.237532,0.224619,tertiary,Temple Place,,london,1.0,463.0,-0.111488,51.511081
24,6.0,CNTR_N00/005g2,0,0.238617,0.224596,tertiary,Temple Place,,london,1.0,464.0,-0.111477,51.511047
48,1.666667,CNTR_N00/005x1,0,0.420449,0.335635,tertiary,Temple Avenue,,london,1.0,3765.0,-0.107033,51.511085
72,1.5,CNTR_N00/005x2,0,0.419348,0.335239,tertiary,Temple Avenue,,london,1.0,3766.0,-0.107035,51.511054
96,7.571429,CNTR_N01/001b1,0,0.04671,0.019461,tertiary,Dover Street,,london,1.0,5171.0,-0.140822,51.507659


In [11]:

def grid(df, sensorid_col, trafficIndex_col, shape=0.01):
    """
    Input:
    - df: DataFrame containing sensor data with longitude and latitude
    - sensorid_col: column name for sensor ids
    - trafficIndex_col: column name for traffic indices (e.g. length or traffic volume)
    - shape: the size of the grid (diameter of the cell)
    
    Output:
    - A DataFrame with the grid and the mean trafficIndex for each grid cell.
    """
    # 1. Round the coordinates to a precision based on 'shape' (grid diameter)
    df['long_rounded'] = (df['long'] // shape) * shape
    df['lat_rounded'] = (df['lat'] // shape) * shape
    
    # 2. Create a grid ID based on the rounded coordinates
    df['grid_id'] = df['long_rounded'].astype(str) + "_" + df['lat_rounded'].astype(str)
    
    # 3. Calculate the mean of the trafficIndex for each grid and count sensors
    grid = df.groupby('grid_id').agg(
        mean_trafficIndex=(trafficIndex_col, 'mean'),
        sensors_in_grid=(sensorid_col, 'count'),
        long_rounded=('long_rounded', 'first'),
        lat_rounded=('lat_rounded', 'first')
    ).reset_index()

    return grid


In [12]:
grid_data = grid(df_real, sensorid_col='detid', trafficIndex_col='traffic', shape=0.01)

# View the grid data
print(grid_data)

                      grid_id  mean_trafficIndex  sensors_in_grid  \
0                 -0.01_51.47           0.666667                6   
1    -0.01_51.480000000000004           0.371795               13   
2                  -0.01_51.5           0.343678               29   
3                 -0.01_51.51           0.824675               55   
4                 -0.01_51.53           1.661111               12   
..                        ...                ...              ...   
223                 0.0_51.51           0.264493               46   
224                 0.0_51.52           0.000000                3   
225                 0.0_51.53           0.103175               21   
226                 0.0_51.54           0.283333               16   
227    0.0_51.550000000000004           0.000000                7   

     long_rounded  lat_rounded  
0           -0.01        51.47  
1           -0.01        51.48  
2           -0.01        51.50  
3           -0.01        51.51  
4     

In [15]:
grid_data.head()


Unnamed: 0,grid_id,mean_trafficIndex,sensors_in_grid,long_rounded,lat_rounded
0,-0.01_51.47,0.666667,6,-0.01,51.47
1,-0.01_51.480000000000004,0.371795,13,-0.01,51.48
2,-0.01_51.5,0.343678,29,-0.01,51.5
3,-0.01_51.51,0.824675,55,-0.01,51.51
4,-0.01_51.53,1.661111,12,-0.01,51.53


In [13]:
def create_polygon(lat, long, shape='circle', size=0.005):
    """
    Create a polygon with different shapes (rectangle, octagon, triangle) around a central point.
    
    Args:
    - lat: Latitude of the center
    - long: Longitude of the center
    - shape: 'circle', 'rectangle', 'octagon', 'triangle'
    - size: the size of the shape (for polygons, it determines the distance of vertices from the center)
    
    Returns:
    - A list of [lat, long] tuples representing the vertices of the polygon.
    """
    if shape == 'rectangle':
        # Return a square (approximate rectangle) around the center
        return [
            [lat - size, long - size],  # bottom-left
            [lat - size, long + size],  # bottom-right
            [lat + size, long + size],  # top-right
            [lat + size, long - size]   # top-left
        ]
    
    elif shape == 'triangle':
        # Return an equilateral triangle (upward facing)
        return [
            [lat + size, long],              # top
            [lat - size / 2, long - size],   # bottom-left
            [lat - size / 2, long + size]    # bottom-right
        ]
    
    elif shape == 'octagon':
        # Create an approximate octagon (8-sided polygon) around the center
        angle_offset = np.pi / 4  # 45 degrees per side
        return [
            [lat + size * np.cos(i * angle_offset), long + size * np.sin(i * angle_offset)]
            for i in range(8)
        ]
    
    else:
        # Default to a circle (using folium.Circle)
        return None  # No polygon, as Circle will be used in the main function

def plot_grid_with_shapes(grid, shape='circle', city_center=(51.5074, -0.1278), zoom_start=12):
    """
    Plot the grid over a map of London with various shapes (circle, rectangle, octagon, triangle).
    - Red indicates higher mean traffic index.
    - Green indicates lower mean traffic index.
    
    Args:
    - grid: DataFrame containing grid information with mean traffic index, rounded lat/long, and grid_id.
    - shape: Shape to use for plotting ('circle', 'rectangle', 'octagon', 'triangle')
    - city_center: Tuple of (latitude, longitude) for the center of the map (default is central London).
    - zoom_start: Initial zoom level for the map (default is 12).
    
    Output:
    - Folium map with grid visualized.
    """
    # Create a Folium map centered around London
    m = folium.Map(location=city_center, zoom_start=zoom_start)

    # Create a color map that interpolates between green (low) and red (high)
    colormap = cm.LinearColormap(colors=['green', 'yellow', 'red'], 
                                 vmin=grid['mean_trafficIndex'].min(), 
                                 vmax=grid['mean_trafficIndex'].max())
    
    colormap.caption = 'Mean Traffic Index'
    m.add_child(colormap)  # Add the colormap to the map

    # Plot the grid cells on the map with the chosen shape
    for _, row in grid.iterrows():
        color = colormap(row['mean_trafficIndex'])
        
        # Determine the vertices for the given shape
        polygon = create_polygon(row['lat_rounded'], row['long_rounded'], shape=shape)
        
        
        if shape == 'circle':
            # If shape is 'circle', use folium.Circle
            folium.Circle(
                location=[row['lat_rounded'], row['long_rounded']],
                radius=500,  # 500 meters radius (adjustable)
                color=color,
                fill=True,
                fill_opacity=0.6,
                popup=f"Grid ID: {row['grid_id']}<br>Mean Traffic Index: {row['mean_trafficIndex']}<br>Sensors in Grid: {row['sensors_in_grid']}"
            ).add_to(m)
        
        elif polygon:
            # If the shape is a polygon (rectangle, triangle, octagon), use folium.Polygon
            folium.Polygon(
                locations=polygon,
                color=color,
                fill=True,
                fill_opacity=0.6,
                popup=f"Grid ID: {row['grid_id']}<br>Mean Traffic Index: {row['mean_trafficIndex']}<br>Sensors in Grid: {row['sensors_in_grid']}"
            ).add_to(m)

    return m


In [14]:

map_with_rectangles1 = plot_grid_with_shapes(grid_data, shape='rectangle', city_center=(51.550, -0.021), zoom_start=15)


map_with_rectangles1

Saving the Grids per Weekday for later testing

In [None]:
def saving_baseline_grids():
    """
    This function generates the baseline grids for all weekdays and intervals and saves them to CSV files.
    They than can be used for later tests.
    """
    
    
    df_sensors = pd.read_csv(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\HSLU_DSPRO1_TrafficStatus\data\RawDataLondon\London_detectors.csv")
    
    weekday_mapping = {
        'Monday': 0,
        'Tuesday': 1,
        'Wednesday': 2,
        'Thursday': 3,
        'Friday': 4,
        'Saturday': 5,
        'Sunday': 6
    }
    
    interval_values= [0, 3600, 7200,10800, 14400, 18000, 21600, 25200, 28800, 32400, 36000, 39600, 43200, 46800, 50400, 54000, 57600, 61200, 64800, 68400, 72000, 75600, 79200, 82800]
    
    
    
    for x in range(7):
        df_weekday = get_random_baseline_prediction(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\data\baseline", x)
        df_weekday_with_coords = pd.merge(df_weekday, df_sensors, on='detid', how='left')

        for y in interval_values:
            df_real = df_weekday_with_coords[df_weekday_with_coords['interval'] == y]
            grid_data = grid(df_real, sensorid_col='detid', trafficIndex_col='traffic', shape=0.01)
            grid_data.to_csv(f"baselinegrids/{x}_{y}.csv", index=False)


In [22]:
saving_baseline_grids()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['long_rounded'] = (df['long'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['lat_rounded'] = (df['lat'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['grid_id'] = df['long_rounded'].astype(str) + "_" + df['lat_rounded'].astype(str)
A value is trying to

Creating Function to for Dashboard to get grid for given Weekday and Hour

In [24]:
def get_weekday_prediction(weekday):
    """
    This function generates a prediction for a given weekday using the baseline models.
    It returns a DataFrame with the predicted traffic values for each sensor.
    Weekday mapping:
    - Monday: 0
    - Tuesday: 1
    - Wednesday: 2
    - Thursday: 3
    - Friday: 4
    - Saturday: 5
    - Sunday: 6
    """
    df_sensors = pd.read_csv(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\HSLU_DSPRO1_TrafficStatus\data\RawDataLondon\London_detectors.csv")
        
    df_weekday = get_random_baseline_prediction(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\data\baseline", weekday)
    df_weekday_with_coords = pd.merge(df_weekday, df_sensors, on='detid', how='left')
    
    return df_weekday_with_coords





In [25]:
def get_hour_prediction(df, interval_value):
    """
    This function generates the baseline grids for all weekdays and intervals.
    possible Intervalls between: [0, 3600, 7200,10800, 14400, 18000, 21600, 25200, 28800, 32400, 36000, 39600, 43200, 46800, 50400, 54000, 57600, 61200, 64800, 68400, 72000, 75600, 79200, 82800]
    
    
    """
    df_real = df[df['interval'] == interval_value]
    grid_data = grid(df_real, sensorid_col='detid', trafficIndex_col='traffic', shape=0.01)
    return grid_data

In [26]:
df_m = get_weekday_prediction(0)
df_m.head()

Unnamed: 0,traffic,detid,interval,length,pos,fclass,road,limit,citycode,lanes,linkid,long,lat
0,15.827968,CNTR_N00/005g1,0,0.237532,0.224619,tertiary,Temple Place,,london,1.0,463.0,-0.111488,51.511081
1,19.79319,CNTR_N00/005g1,3600,0.237532,0.224619,tertiary,Temple Place,,london,1.0,463.0,-0.111488,51.511081
2,19.489631,CNTR_N00/005g1,7200,0.237532,0.224619,tertiary,Temple Place,,london,1.0,463.0,-0.111488,51.511081
3,15.197384,CNTR_N00/005g1,10800,0.237532,0.224619,tertiary,Temple Place,,london,1.0,463.0,-0.111488,51.511081
4,14.349067,CNTR_N00/005g1,14400,0.237532,0.224619,tertiary,Temple Place,,london,1.0,463.0,-0.111488,51.511081


In [28]:
grid_m0 = get_hour_prediction(df_m, 0)
grid_m0.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['long_rounded'] = (df['long'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['lat_rounded'] = (df['lat'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['grid_id'] = df['long_rounded'].astype(str) + "_" + df['lat_rounded'].astype(str)


Unnamed: 0,grid_id,mean_trafficIndex,sensors_in_grid,long_rounded,lat_rounded
0,-0.01_51.47,14.300455,5,-0.01,51.47
1,-0.01_51.480000000000004,8.875371,14,-0.01,51.48
2,-0.01_51.5,4.926609,26,-0.01,51.5
3,-0.01_51.51,6.372463,37,-0.01,51.51
4,-0.01_51.53,6.221998,9,-0.01,51.53


In [29]:
map_with_rectangles1 = plot_grid_with_shapes(grid_m0, shape='rectangle', city_center=(51.550, -0.021), zoom_start=15)


map_with_rectangles1