# Hier wird die Grid Funktion erstellt.
Die Funktion sollte etwa wie folgt gehen:

Function grid

input(sensorid, trafficIndex, shape=Diameter)

Output(Grid)

In [30]:
!pip install branca



In [25]:
!pip install folium geopandas

Collecting folium
  Obtaining dependency information for folium from https://files.pythonhosted.org/packages/ae/6d/18a7546e1748ecdd6ed7cd00d3f183faf1df08bd4f5e5e0eb3e72458b862/folium-0.17.0-py2.py3-none-any.whl.metadata
  Downloading folium-0.17.0-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting geopandas
  Obtaining dependency information for geopandas from https://files.pythonhosted.org/packages/c4/64/7d344cfcef5efddf9cf32f59af7f855828e9d74b5f862eddf5bfd9f25323/geopandas-1.0.1-py3-none-any.whl.metadata
  Downloading geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)
Collecting branca>=0.6.0 (from folium)
  Obtaining dependency information for branca>=0.6.0 from https://files.pythonhosted.org/packages/fc/be/720f85abacd654ec86f1431bc7c004eae74417bd9d0e7a2bc43601062627/branca-0.8.0-py3-none-any.whl.metadata
  Downloading branca-0.8.0-py3-none-any.whl.metadata (1.5 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Obtaining dependency information for pyogrio>=0.7.2 from https://files.pyt

In [2]:
import pandas as pd
import numpy as np
import branca.colormap as cm  # Used for color gradient
import folium
import geopandas

In [3]:
df_sensors = pd.read_csv(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\HSLU_DSPRO1_TrafficStatus\data\RawDataLondon\London_detectors.csv")
df_usedsensors = pd.read_csv(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\data\London_UTD19_Modified_23.10.2024.csv")

In [None]:
df_real = pd.merge(df_usedsensors, df_sensors, on='detid', how='left')



df_real.drop_duplicates(subset='detid', keep='first', inplace=True)
#df_real.head()
df_real

Unnamed: 0,day,interval,detid,weekday,traffic,length,pos,fclass,road,limit,citycode,lanes,linkid,long,lat
0,2015-05-16,0,CNTR_N01/001b1,Saturday,11.0,0.046710,0.019461,tertiary,Dover Street,,london,1.0,5171.0,-0.140822,51.507659
6071,2015-05-16,0,CNTR_N01/001d1,Saturday,24.0,0.063071,0.037390,tertiary,Stratton Street,,london,1.0,42.0,-0.141902,51.507265
12084,2015-05-16,0,CNTR_N01/001d2,Saturday,15.0,0.062435,0.037490,tertiary,Stratton Street,,london,1.0,43.0,-0.141871,51.507242
17991,2015-05-16,0,CNTR_N01/002h1,Saturday,8.0,0.065935,0.038988,other,Berkeley Street,,london,1.0,44.0,-0.141150,51.507612
23942,2015-05-16,0,CNTR_N01/002x1,Saturday,1.0,0.288005,0.271785,tertiary,Dover Street,,london,1.0,5176.0,-0.140862,51.507990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10966532,2015-05-16,300,SOUT_N10/234a1,Saturday,4.0,0.207137,0.179201,tertiary,Taybridge Road,,london,1.0,5220.0,-0.149506,51.461271
10971953,2015-05-16,0,SOUT_N10/275a1,Saturday,1.0,0.100059,0.069135,tertiary,Ashley Crescent,,london,1.0,2207.0,-0.150636,51.466664
10976646,2015-05-16,0,SOUT_N10/275b1,Saturday,2.0,0.537231,0.059944,tertiary,Dunston Road,,london,1.0,2208.0,-0.150441,51.467859
10981028,2015-05-16,0,SOUT_N10/313a1,Saturday,2.0,0.141588,0.098684,primary,Nine Elms Lane,,london,1.0,2133.0,-0.132595,51.483244


In [5]:

def grid(df, sensorid_col, trafficIndex_col, shape=0.01):
    """
    Input:
    - df: DataFrame containing sensor data with longitude and latitude
    - sensorid_col: column name for sensor ids
    - trafficIndex_col: column name for traffic indices (e.g. length or traffic volume)
    - shape: the size of the grid (diameter of the cell)
    
    Output:
    - A DataFrame with the grid and the mean trafficIndex for each grid cell.
    """
    # 1. Round the coordinates to a precision based on 'shape' (grid diameter)
    df['long_rounded'] = (df['long'] // shape) * shape
    df['lat_rounded'] = (df['lat'] // shape) * shape
    
    # 2. Create a grid ID based on the rounded coordinates
    df['grid_id'] = df['long_rounded'].astype(str) + "_" + df['lat_rounded'].astype(str)
    
    # 3. Calculate the mean of the trafficIndex for each grid and count sensors
    grid = df.groupby('grid_id').agg(
        mean_trafficIndex=(trafficIndex_col, 'mean'),
        sensors_in_grid=(sensorid_col, 'count'),
        long_rounded=('long_rounded', 'first'),
        lat_rounded=('lat_rounded', 'first')
    ).reset_index()

    return grid


In [6]:
grid_data = grid(df_real, sensorid_col='detid', trafficIndex_col='traffic', shape=0.01)

# View the grid data
print(grid_data)

                      grid_id  mean_trafficIndex  sensors_in_grid  \
0                 -0.01_51.47           6.800000                5   
1    -0.01_51.480000000000004           8.571429               14   
2                  -0.01_51.5           3.680000               25   
3                 -0.01_51.51           5.685714               35   
4                 -0.01_51.53           6.500000                8   
..                        ...                ...              ...   
193                 0.0_51.51           2.612903               31   
194                 0.0_51.52           0.500000                2   
195                 0.0_51.53           1.266667               15   
196                 0.0_51.54           2.000000                6   
197    0.0_51.550000000000004           0.666667                3   

     long_rounded  lat_rounded  
0           -0.01        51.47  
1           -0.01        51.48  
2           -0.01        51.50  
3           -0.01        51.51  
4     

In [7]:
def create_polygon(lat, long, shape='circle', size=0.005):
    """
    Create a polygon with different shapes (rectangle, octagon, triangle) around a central point.
    
    Args:
    - lat: Latitude of the center
    - long: Longitude of the center
    - shape: 'circle', 'rectangle', 'octagon', 'triangle'
    - size: the size of the shape (for polygons, it determines the distance of vertices from the center)
    
    Returns:
    - A list of [lat, long] tuples representing the vertices of the polygon.
    """
    if shape == 'rectangle':
        # Return a square (approximate rectangle) around the center
        return [
            [lat - size, long - size],  # bottom-left
            [lat - size, long + size],  # bottom-right
            [lat + size, long + size],  # top-right
            [lat + size, long - size]   # top-left
        ]
    
    elif shape == 'triangle':
        # Return an equilateral triangle (upward facing)
        return [
            [lat + size, long],              # top
            [lat - size / 2, long - size],   # bottom-left
            [lat - size / 2, long + size]    # bottom-right
        ]
    
    elif shape == 'octagon':
        # Create an approximate octagon (8-sided polygon) around the center
        angle_offset = np.pi / 4  # 45 degrees per side
        return [
            [lat + size * np.cos(i * angle_offset), long + size * np.sin(i * angle_offset)]
            for i in range(8)
        ]
    
    else:
        # Default to a circle (using folium.Circle)
        return None  # No polygon, as Circle will be used in the main function

def plot_grid_with_shapes(grid, shape='circle', city_center=(51.5074, -0.1278), zoom_start=12):
    """
    Plot the grid over a map of London with various shapes (circle, rectangle, octagon, triangle).
    - Red indicates higher mean traffic index.
    - Green indicates lower mean traffic index.
    
    Args:
    - grid: DataFrame containing grid information with mean traffic index, rounded lat/long, and grid_id.
    - shape: Shape to use for plotting ('circle', 'rectangle', 'octagon', 'triangle')
    - city_center: Tuple of (latitude, longitude) for the center of the map (default is central London).
    - zoom_start: Initial zoom level for the map (default is 12).
    
    Output:
    - Folium map with grid visualized.
    """
    # Create a Folium map centered around London
    m = folium.Map(location=city_center, zoom_start=zoom_start)

    # Create a color map that interpolates between green (low) and red (high)
    colormap = cm.LinearColormap(colors=['green', 'yellow', 'red'], 
                                 vmin=grid['mean_trafficIndex'].min(), 
                                 vmax=grid['mean_trafficIndex'].max())
    
    colormap.caption = 'Mean Traffic Index'
    m.add_child(colormap)  # Add the colormap to the map

    # Plot the grid cells on the map with the chosen shape
    for _, row in grid.iterrows():
        color = colormap(row['mean_trafficIndex'])
        
        # Determine the vertices for the given shape
        polygon = create_polygon(row['lat_rounded'], row['long_rounded'], shape=shape)
        
        
        if shape == 'circle':
            # If shape is 'circle', use folium.Circle
            folium.Circle(
                location=[row['lat_rounded'], row['long_rounded']],
                radius=500,  # 500 meters radius (adjustable)
                color=color,
                fill=True,
                fill_opacity=0.6,
                popup=f"Grid ID: {row['grid_id']}<br>Mean Traffic Index: {row['mean_trafficIndex']}<br>Sensors in Grid: {row['sensors_in_grid']}"
            ).add_to(m)
        
        elif polygon:
            # If the shape is a polygon (rectangle, triangle, octagon), use folium.Polygon
            folium.Polygon(
                locations=polygon,
                color=color,
                fill=True,
                fill_opacity=0.6,
                popup=f"Grid ID: {row['grid_id']}<br>Mean Traffic Index: {row['mean_trafficIndex']}<br>Sensors in Grid: {row['sensors_in_grid']}"
            ).add_to(m)

    return m


In [None]:
# Plotten mit Kreisen (standardmäßig)
#map_with_circles = plot_grid_with_shapes(grid_data, shape='circle', city_center=(51.550, -0.021), zoom_start=15)
#map_with_circles.save('london_grid_circles.html')

# Plotten mit Rechtecken
map_with_rectangles1 = plot_grid_with_shapes(grid_data, shape='rectangle', city_center=(51.550, -0.021), zoom_start=15)
#map_with_rectangles.save('london_grid_rectangles.html')

# Plotten mit Achtecken
#map_with_octagons = plot_grid_with_shapes(grid_data, shape='octagon', city_center=(51.550, -0.021), zoom_start=15)
#map_with_octagons.save('london_grid_octagons.html')

# Plotten mit Dreiecken
#map_with_triangles = plot_grid_with_shapes(grid_data, shape='triangle', city_center=(51.550, -0.021), zoom_start=15)
#map_with_triangles.save('london_grid_triangles.html')

map_with_rectangles1

In [9]:
#df_sensors = pd.read_csv(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\HSLU_DSPRO1_TrafficStatus\data\RawDataLondon\London_detectors.csv")
df_allLondonSensors = pd.read_csv(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\data\London_UTD19.csv")

Erste Eintrag eines von einem Sensor nehmen

In [10]:
df_allLondonSensors.drop_duplicates(subset='detid', keep='first', inplace=True)
df_real2 = pd.merge(df_allLondonSensors, df_sensors, on='detid', how='left')
df_real2.head()

Unnamed: 0,day,interval,detid,flow,occ,error,city,speed,length,pos,fclass,road,limit,citycode,lanes,linkid,long,lat
0,2015-05-16,0,EAST_N04/161x1,144.0,0.028333,0.0,london,,0.303585,0.261157,secondary,Homerton Road,,london,1.0,5082.0,-0.021497,51.550929
1,2015-05-16,0,EAST_N04/161y1,96.0,0.013333,0.0,london,,0.103679,0.063417,primary,Eastway,,london,1.0,5091.0,-0.020899,51.550704
2,2015-05-16,0,EAST_N04/162a1,348.0,0.071667,0.0,london,,0.260623,0.117906,secondary,Homerton Road,,london,1.0,5083.0,-0.022649,51.550907
3,2015-05-16,0,EAST_N04/162a2,300.0,0.046667,1.0,london,,0.216874,0.117942,secondary,Homerton Road,,london,1.0,5084.0,-0.022617,51.55088
4,2015-05-16,0,EAST_N04/163f1,240.0,0.046667,0.0,london,,0.344754,0.329789,primary,Eastway,,london,1.0,5092.0,-0.019288,51.552281


In [11]:
def calculate_traffic_speed(df, flow_column='flow', occ_column='occ', traffic_column='traffic'):
    """
    Calculates the traffic speed and adds it as a new column to the DataFrame.

    This function calculates the traffic speed using the formula speed = flow * occupancy
    and adds the result as a new column to the DataFrame.

    Parameters:
    df (pandas.DataFrame): The input DataFrame containing the data.
    flow_column (str): The name of the column representing the flow. Default is 'flow'.
    occ_column (str): The name of the column representing the occupancy. Default is 'occ'.
    traffic_column (str): The name of the new column to store the calculated traffic speed. Default is 'traffic'.

    Returns:
    pandas.DataFrame: The DataFrame with the new traffic speed column added.
    """
    df[traffic_column] = df[flow_column] * df[occ_column]
    return df

calculate_traffic_speed(df_real2)

Unnamed: 0,day,interval,detid,flow,occ,error,city,speed,length,pos,fclass,road,limit,citycode,lanes,linkid,long,lat,traffic
0,2015-05-16,0,EAST_N04/161x1,144.0,0.028333,0.0,london,,0.303585,0.261157,secondary,Homerton Road,,london,1.0,5082.0,-0.021497,51.550929,4.080000
1,2015-05-16,0,EAST_N04/161y1,96.0,0.013333,0.0,london,,0.103679,0.063417,primary,Eastway,,london,1.0,5091.0,-0.020899,51.550704,1.280000
2,2015-05-16,0,EAST_N04/162a1,348.0,0.071667,0.0,london,,0.260623,0.117906,secondary,Homerton Road,,london,1.0,5083.0,-0.022649,51.550907,24.939998
3,2015-05-16,0,EAST_N04/162a2,300.0,0.046667,1.0,london,,0.216874,0.117942,secondary,Homerton Road,,london,1.0,5084.0,-0.022617,51.550880,14.000000
4,2015-05-16,0,EAST_N04/163f1,240.0,0.046667,0.0,london,,0.344754,0.329789,primary,Eastway,,london,1.0,5092.0,-0.019288,51.552281,11.200000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5714,2015-05-16,0,CNTR_N12/064a1,144.0,0.033333,0.0,london,,0.199990,0.190530,tertiary,Courtfield Road,,london,1.0,973.0,-0.181609,51.493436,4.800000
5715,2015-05-16,0,CNTR_N10/174a2,492.0,0.084167,0.0,london,,0.095656,0.045995,primary,Bridgend Road,,london,1.0,871.0,-0.184713,51.462268,41.410000
5716,2015-05-16,0,CNTR_N10/288a1,12.0,0.006667,1.0,london,,0.098547,0.085619,primary,Bridgend Road,,london,1.0,866.0,-0.184533,51.463036,0.080000
5717,2015-05-16,0,CNTR_N01/095h1,0.0,0.000000,1.0,london,,0.212300,0.201511,primary,Victoria Embankment,,london,1.0,455.0,-0.115650,51.510483,0.000000


In [12]:
grid_data = grid(df_real2, sensorid_col='detid', trafficIndex_col='traffic', shape=0.01)

# View the grid data
print(grid_data)

                      grid_id  mean_trafficIndex  sensors_in_grid  \
0                 -0.01_51.47          57.880003                6   
1    -0.01_51.480000000000004          71.863125               16   
2                  -0.01_51.5          30.987180               39   
3                 -0.01_51.51          54.713069               88   
4                 -0.01_51.53          15.505319               47   
..                        ...                ...              ...   
227                 0.0_51.51          14.181250               72   
228                 0.0_51.52           6.208000                5   
229                 0.0_51.53          11.033333               30   
230                 0.0_51.54          10.682105               38   
231    0.0_51.550000000000004           8.835714                7   

     long_rounded  lat_rounded  
0           -0.01        51.47  
1           -0.01        51.48  
2           -0.01        51.50  
3           -0.01        51.51  
4     

In [None]:
# Plotten mit Rechtecken
map_with_rectangles2 = plot_grid_with_shapes(grid_data, shape='rectangle', city_center=(51.550, -0.021), zoom_start=15)

map_with_rectangles2

Erkenntnis alle geben ein falsches bild,
daher nach und nach verschiedene typen von anomalien entfernen

Anomalien entfernen:

In [14]:
df_anomalies = pd.read_csv(r"C:\Users\rueed\OneDrive\HSLU\3 Semester\DSPRO 1\data\Anomalies_31.10.2024.csv")

In [15]:
df_aBound = []
df_aSmall = []
df_aData = []

df_aBound = df_anomalies[df_anomalies['IQR_out_of_bound'] == True]
df_aSmall = df_anomalies[(df_anomalies['IQR_to_small'] == True) & (df_anomalies['IQR_out_of_bound'] == False)]
df_aData = df_anomalies[(df_anomalies['not_enough_data'] == True) & (df_anomalies['IQR_out_of_bound'] == False) & (df_anomalies['IQR_to_small'] == False)]

In [16]:
df_real2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5719 entries, 0 to 5718
Data columns (total 22 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   day           5719 non-null   object 
 1   interval      5719 non-null   int64  
 2   detid         5719 non-null   object 
 3   flow          5719 non-null   float64
 4   occ           5719 non-null   float64
 5   error         5719 non-null   float64
 6   city          5719 non-null   object 
 7   speed         0 non-null      float64
 8   length        5719 non-null   float64
 9   pos           5719 non-null   float64
 10  fclass        5719 non-null   object 
 11  road          5048 non-null   object 
 12  limit         0 non-null      float64
 13  citycode      5719 non-null   object 
 14  lanes         5719 non-null   float64
 15  linkid        5719 non-null   float64
 16  long          5719 non-null   float64
 17  lat           5719 non-null   float64
 18  traffic       5719 non-null 

In [17]:
df_aBound.info()

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 11 to 3282
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   detid             96 non-null     object
 1   IQR_out_of_bound  96 non-null     bool  
 2   IQR_to_small      96 non-null     bool  
 3   not_enough_data   96 non-null     bool  
dtypes: bool(3), object(1)
memory usage: 1.8+ KB


In [19]:
df_real3 = df_real2[~df_real2['detid'].isin(df_aBound['detid'])]
df_real3.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5623 entries, 0 to 5718
Data columns (total 22 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   day           5623 non-null   object 
 1   interval      5623 non-null   int64  
 2   detid         5623 non-null   object 
 3   flow          5623 non-null   float64
 4   occ           5623 non-null   float64
 5   error         5623 non-null   float64
 6   city          5623 non-null   object 
 7   speed         0 non-null      float64
 8   length        5623 non-null   float64
 9   pos           5623 non-null   float64
 10  fclass        5623 non-null   object 
 11  road          4962 non-null   object 
 12  limit         0 non-null      float64
 13  citycode      5623 non-null   object 
 14  lanes         5623 non-null   float64
 15  linkid        5623 non-null   float64
 16  long          5623 non-null   float64
 17  lat           5623 non-null   float64
 18  traffic       5623 non-null   flo

In [None]:
grid_data = grid(df_real3, sensorid_col='detid', trafficIndex_col='traffic', shape=0.01)
map_with_rectangles3 = plot_grid_with_shapes(grid_data, shape='rectangle', city_center=(51.550, -0.021), zoom_start=15)

map_with_rectangles3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['long_rounded'] = (df['long'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['lat_rounded'] = (df['lat'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['grid_id'] = df['long_rounded'].astype(str) + "_" + df['lat_rounded'].astype(str)


In [22]:
df_aSmall.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1158 entries, 6 to 3539
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   detid             1158 non-null   object
 1   IQR_out_of_bound  1158 non-null   bool  
 2   IQR_to_small      1158 non-null   bool  
 3   not_enough_data   1158 non-null   bool  
dtypes: bool(3), object(1)
memory usage: 21.5+ KB


In [23]:
df_real4 = df_real3[~df_real3['detid'].isin(df_aSmall['detid'])]
df_real4.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4465 entries, 0 to 5718
Data columns (total 22 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   day           4465 non-null   object 
 1   interval      4465 non-null   int64  
 2   detid         4465 non-null   object 
 3   flow          4465 non-null   float64
 4   occ           4465 non-null   float64
 5   error         4465 non-null   float64
 6   city          4465 non-null   object 
 7   speed         0 non-null      float64
 8   length        4465 non-null   float64
 9   pos           4465 non-null   float64
 10  fclass        4465 non-null   object 
 11  road          3927 non-null   object 
 12  limit         0 non-null      float64
 13  citycode      4465 non-null   object 
 14  lanes         4465 non-null   float64
 15  linkid        4465 non-null   float64
 16  long          4465 non-null   float64
 17  lat           4465 non-null   float64
 18  traffic       4465 non-null   flo

In [24]:
grid_data = grid(df_real4, sensorid_col='detid', trafficIndex_col='traffic', shape=0.01)
map_with_rectangles4 = plot_grid_with_shapes(grid_data, shape='rectangle', city_center=(51.550, -0.021), zoom_start=15)

map_with_rectangles4

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['long_rounded'] = (df['long'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['lat_rounded'] = (df['lat'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['grid_id'] = df['long_rounded'].astype(str) + "_" + df['lat_rounded'].astype(str)


In [25]:
df_aData.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2286 entries, 0 to 3532
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   detid             2286 non-null   object
 1   IQR_out_of_bound  2286 non-null   bool  
 2   IQR_to_small      2286 non-null   bool  
 3   not_enough_data   2286 non-null   bool  
dtypes: bool(3), object(1)
memory usage: 42.4+ KB


In [26]:
df_real5 = df_real4[~df_real4['detid'].isin(df_aData['detid'])]
df_real5.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2179 entries, 2 to 5718
Data columns (total 22 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   day           2179 non-null   object 
 1   interval      2179 non-null   int64  
 2   detid         2179 non-null   object 
 3   flow          2179 non-null   float64
 4   occ           2179 non-null   float64
 5   error         2179 non-null   float64
 6   city          2179 non-null   object 
 7   speed         0 non-null      float64
 8   length        2179 non-null   float64
 9   pos           2179 non-null   float64
 10  fclass        2179 non-null   object 
 11  road          1913 non-null   object 
 12  limit         0 non-null      float64
 13  citycode      2179 non-null   object 
 14  lanes         2179 non-null   float64
 15  linkid        2179 non-null   float64
 16  long          2179 non-null   float64
 17  lat           2179 non-null   float64
 18  traffic       2179 non-null   flo

In [27]:
grid_data = grid(df_real5, sensorid_col='detid', trafficIndex_col='traffic', shape=0.01)
map_with_rectangles5 = plot_grid_with_shapes(grid_data, shape='rectangle', city_center=(51.550, -0.021), zoom_start=15)

map_with_rectangles5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['long_rounded'] = (df['long'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['lat_rounded'] = (df['lat'] // shape) * shape
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['grid_id'] = df['long_rounded'].astype(str) + "_" + df['lat_rounded'].astype(str)
