In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

#### Load datasets

In [2]:
PLR_gdf = gpd.read_file('../data/input/berlin/lor_planungsraeume_2021.geojson') # import Planungsräume (PLR) geo data
BZR_gdf = gpd.read_file('../data/input/berlin/lor_bezirksregionen_2021.geojson') # import Bezirksregionen (BZR) geo data
PGR_gdf = gpd.read_file('../data/input/berlin/lor_prognoseraeume_2021.geojson') # import Prognoseräume (PGR) geo data
STREETS_gdf = gpd.read_file('../data/input/berlin/Detailnetz-Strassenabschnitte.geojson') # import Straßen geo data

In [18]:
accident_df = pd.read_csv('../data/temp/raw_accident_dataset_2018-2021.csv')

  accident_df = pd.read_csv('../data/temp/raw_accident_dataset_2018-2021.csv')


In [None]:
# accident_df.head(5).T

In [19]:
accident_df = accident_df.drop(columns=["lor", "linrefx", "linrefy"])

In [None]:
accident_gdf = gpd.GeoDataFrame(accident_df, geometry=gpd.points_from_xy(accident_df.longitude, accident_df.latitude),
    crs="EPSG:32632") # transform to GeoDataFrame

### Add PLR Area

In [None]:
accident_gdf = accident_gdf.to_crs(PLR_gdf.crs) # same crs
joined_gdf = gpd.sjoin(accident_gdf, PLR_gdf, op='within', how='left') # join

In [None]:
accident_df['PLR_ID'] = joined_gdf['PLR_ID'] # add to main df
accident_df['PLR_NAME'] = joined_gdf['PLR_NAME'] # add to main df


### Add BZR area

In [None]:
accident_gdf = accident_gdf.to_crs(BZR_gdf.crs) # same crs
joined_gdf = gpd.sjoin(accident_gdf, BZR_gdf, predicate='within', how='left') # join

In [None]:
accident_df['BZR_ID'] = joined_gdf['BZR_ID'] # add to main df
accident_df['BZR_NAME'] = joined_gdf['BZR_NAME'] # add to main df


#### ADD PGR area

In [None]:
accident_gdf = accident_gdf.to_crs(PGR_gdf.crs) # same crs
joined_gdf = gpd.sjoin(accident_gdf, PGR_gdf, predicate='within', how='left') # join

In [None]:
accident_df['PGR_ID'] = joined_gdf['PGR_ID'] # add to main df
accident_df['PGR_NAME'] = joined_gdf['PGR_NAME'] # add to main df


In [None]:
joined_gdf

In [None]:
accident_df

#### Get Street Names / using OSMNX library

In [None]:
accident_df.head(5).T

#### nearest distance

In [None]:
STREETS_gdf

In [None]:
# Create a Point geometry from latitude and longitude for each accident
accident_df['geometry'] = accident_df.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)

# Convert the DataFrame to a GeoDataFrame
accident_gdf = gpd.GeoDataFrame(accident_df, geometry='geometry')
accident_gdf

In [None]:
# Set CRS for accident data to WGS 84
accident_gdf = accident_gdf.set_crs('EPSG:4326')

# Convert accident data to the same CRS as the street data
accident_gdf = accident_gdf.to_crs(STREETS_gdf.crs)


#### Get Street Names / Using Street data from berlin website

In [None]:
# Select the first 10 rows (or any other number you prefer)
accident_subset = accident_gdf.head(10)

In [None]:
from shapely.ops import nearest_points
from tqdm import tqdm

# Initialize columns to store the nearest street information
accident_subset['nearest_street_idx'] = None
accident_subset['street_attribute'] = None

# For each accident point, find the nearest street
for index, row in tqdm(accident_subset.iterrows()):
    point = row.geometry
    
    # Find the nearest street geometry and its index
    nearest_geom, nearest_idx = None, None
    min_distance = float('inf')
    for idx, street_row in STREETS_gdf.iterrows():
        street_geom = street_row.geometry
        distance = point.distance(street_geom)
        if distance < min_distance:
            min_distance = distance
            nearest_geom = street_geom
            nearest_idx = idx
    
    # Update the GeoDataFrame with the nearest street information
    accident_subset.at[index, 'nearest_street_idx'] = nearest_idx
    accident_subset.at[index, 'street_attribute'] = STREETS_gdf.at[nearest_idx, 'strassenna']


In [None]:
accident_subset

In [None]:
street_sindex = STREETS_gdf.sindex

#### First search - Getting street names / using OSMNX library

In [35]:
# Import necessary libraries
from shapely.geometry import Point
from geopandas.tools import sjoin
import osmnx as ox
import geopandas as gpd

# Fetch the road network of Berlin
G = ox.graph_from_place('Berlin, Germany', network_type='drive')

# Split the road network into nodes and edges
nodes, edges = ox.graph_to_gdfs(G)



In [51]:
edges

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,lanes,name,highway,maxspeed,oneway,reversed,length,geometry,width,ref,bridge,tunnel,junction,access,area,service,est_width
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
172539,34694336,0,5117633,2,Gotthardstraße,secondary,50,False,True,687.484,"LINESTRING (13.33550 52.56521, 13.33565 52.565...",,,,,,,,,
172539,172562,0,"[33231778, 31740296, 4804202, 1119769231, 1119...","[3, 1, 2]",Gotthardstraße,secondary,50,False,False,436.961,"LINESTRING (13.33550 52.56521, 13.33533 52.565...",,,,,,,,,
172539,34694265,0,"[1188446056, 4610047, 1188446055]",2,Holländerstraße,tertiary,50,False,False,237.482,"LINESTRING (13.33550 52.56521, 13.33560 52.565...",,,,,,,,,
172545,271370539,0,24973218,,Teichstraße,tertiary,50,False,False,17.179,"LINESTRING (13.34661 52.56703, 13.34664 52.56688)",,,,,,,,,
172545,28345515,0,4546470,2,Teichstraße,tertiary,50,False,True,171.849,"LINESTRING (13.34661 52.56703, 13.34659 52.567...",,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11301166219,11301166219,0,27493498,,Am Rain,residential,30,False,False,42.005,"LINESTRING (13.13533 52.52719, 13.13533 52.527...",,,,,,,,,
11301166219,11301166219,1,27493498,,Am Rain,residential,30,False,True,42.005,"LINESTRING (13.13533 52.52719, 13.13535 52.527...",,,,,,,,,
11301166219,33236437,0,1219643377,,Am Rain,residential,30,False,True,137.480,"LINESTRING (13.13533 52.52719, 13.13496 52.527...",,,,,,,,,
11303769539,768474203,0,"[548510358, 4702902, 4067919]",3,Köpenicker Landstraße,primary,50,False,True,113.843,"LINESTRING (13.49481 52.46688, 13.49509 52.466...",,B 96a,yes,,,,,,


In [42]:
accident_df.head(5)

Unnamed: 0,key,objectid,bez,strasse,lor_ab_2021,ujahr,umonat,ustunde,uwochentag,ukategorie,...,ulichtverh,istrad,istpkw,istfuss,istkrad,istgkfz,istsonstige,ustrzustand,longitude,latitude
0,112695-2018,112695,2,Samariterviertel,2500729,2018,1,15,4,3,...,0,0,1,1,0,0,0,1,13.475018,52.513597
1,112705-2018,112705,12,Ziekowstraße/Freie Scholle,12500824,2018,1,11,2,3,...,0,0,1,0,0,0,0,0,13.291022,52.587259
2,112726-2018,112726,2,Barnimkiez,2400520,2018,1,9,3,3,...,0,0,1,1,0,0,0,0,13.420578,52.526019
3,112737-2018,112737,7,Volkspark (Rudolf-Wilde-Park),7200308,2018,1,17,2,3,...,2,0,1,1,0,0,0,0,13.348288,52.481844
4,112747-2018,112747,3,Niederschönhausen,3200206,2018,1,15,4,3,...,1,1,0,1,0,0,0,1,13.403228,52.583472


In [53]:
edges

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,lanes,name,highway,maxspeed,oneway,reversed,length,geometry,width,ref,bridge,tunnel,junction,access,area,service,est_width
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
172539,34694336,0,5117633,2,Gotthardstraße,secondary,50,False,True,687.484,"LINESTRING (13.33550 52.56521, 13.33565 52.565...",,,,,,,,,
172539,172562,0,"[33231778, 31740296, 4804202, 1119769231, 1119...","[3, 1, 2]",Gotthardstraße,secondary,50,False,False,436.961,"LINESTRING (13.33550 52.56521, 13.33533 52.565...",,,,,,,,,
172539,34694265,0,"[1188446056, 4610047, 1188446055]",2,Holländerstraße,tertiary,50,False,False,237.482,"LINESTRING (13.33550 52.56521, 13.33560 52.565...",,,,,,,,,
172545,271370539,0,24973218,,Teichstraße,tertiary,50,False,False,17.179,"LINESTRING (13.34661 52.56703, 13.34664 52.56688)",,,,,,,,,
172545,28345515,0,4546470,2,Teichstraße,tertiary,50,False,True,171.849,"LINESTRING (13.34661 52.56703, 13.34659 52.567...",,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11301166219,11301166219,0,27493498,,Am Rain,residential,30,False,False,42.005,"LINESTRING (13.13533 52.52719, 13.13533 52.527...",,,,,,,,,
11301166219,11301166219,1,27493498,,Am Rain,residential,30,False,True,42.005,"LINESTRING (13.13533 52.52719, 13.13535 52.527...",,,,,,,,,
11301166219,33236437,0,1219643377,,Am Rain,residential,30,False,True,137.480,"LINESTRING (13.13533 52.52719, 13.13496 52.527...",,,,,,,,,
11303769539,768474203,0,"[548510358, 4702902, 4067919]",3,Köpenicker Landstraße,primary,50,False,True,113.843,"LINESTRING (13.49481 52.46688, 13.49509 52.466...",,B 96a,yes,,,,,,


In [66]:
accident_gdf = gpd.GeoDataFrame(accident_df, geometry=gpd.points_from_xy(accident_df.longitude, accident_df.latitude),
    crs="EPSG: 4326 ") # transform to GeoDataFrame and set crs

edges = edges.to_crs(epsg= "4326" )# set same crs 

In [40]:
edges

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,lanes,name,highway,maxspeed,oneway,reversed,length,geometry,width,ref,bridge,tunnel,junction,access,area,service,est_width
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
172539,34694336,0,5117633,2,Gotthardstraße,secondary,50,False,True,687.484,"LINESTRING (13.33550 52.56521, 13.33565 52.565...",,,,,,,,,
172539,172562,0,"[33231778, 31740296, 4804202, 1119769231, 1119...","[3, 1, 2]",Gotthardstraße,secondary,50,False,False,436.961,"LINESTRING (13.33550 52.56521, 13.33533 52.565...",,,,,,,,,
172539,34694265,0,"[1188446056, 4610047, 1188446055]",2,Holländerstraße,tertiary,50,False,False,237.482,"LINESTRING (13.33550 52.56521, 13.33560 52.565...",,,,,,,,,
172545,271370539,0,24973218,,Teichstraße,tertiary,50,False,False,17.179,"LINESTRING (13.34661 52.56703, 13.34664 52.56688)",,,,,,,,,
172545,28345515,0,4546470,2,Teichstraße,tertiary,50,False,True,171.849,"LINESTRING (13.34661 52.56703, 13.34659 52.567...",,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11301166219,11301166219,0,27493498,,Am Rain,residential,30,False,False,42.005,"LINESTRING (13.13533 52.52719, 13.13533 52.527...",,,,,,,,,
11301166219,11301166219,1,27493498,,Am Rain,residential,30,False,True,42.005,"LINESTRING (13.13533 52.52719, 13.13535 52.527...",,,,,,,,,
11301166219,33236437,0,1219643377,,Am Rain,residential,30,False,True,137.480,"LINESTRING (13.13533 52.52719, 13.13496 52.527...",,,,,,,,,
11303769539,768474203,0,"[548510358, 4702902, 4067919]",3,Köpenicker Landstraße,primary,50,False,True,113.843,"LINESTRING (13.49481 52.46688, 13.49509 52.466...",,B 96a,yes,,,,,,


In [67]:
from tqdm import tqdm
import numpy as np
import pandas as pd
from shapely.geometry import Point
import warnings

tqdm.pandas()

## funktioniert

def find_nearest_street(row):
    point = Point(row['longitude'], row['latitude'])
    
    # Use the spatial index to find the nearest edges
    possible_matches_index = np.array(list(edges.sindex.nearest(point, return_all=True))).flatten()
    
    # Use .iloc to select the candidates
    possible_matches = edges.iloc[possible_matches_index]
    
    # Find the nearest edge 
    nearest_edge = possible_matches.geometry.distance(point).idxmin()
    
    # Get the street name
    street_name = edges.loc[nearest_edge]['name']
    return street_name


# Function to split a dataframe into chunks
def split_dataframe(df, chunk_size):
    chunks = [df[i:i + chunk_size] for i in range(0, df.shape[0], chunk_size)]
    return chunks

# Split the dataframe / for backups
chunk_size = len(accident_gdf) // 10 + 1
data_chunks = split_dataframe(accident_gdf, chunk_size)

warnings.filterwarnings('ignore')

# Process each chunk and save the results to a file
for i, chunk in enumerate(data_chunks, start=1):
    print(f"Processing chunk {i}/{len(data_chunks)}")
    chunk_copy = chunk.copy()
    chunk_copy.loc[:, 'nearest_street'] = chunk_copy.progress_apply(find_nearest_street, axis=1)
    chunk_copy.to_csv(f'../data/temp/accident_data_chunk_{i}.csv', index=False)

warnings.filterwarnings('default')

Processing chunk 1/10


  0%|                                                                                         | 0/4511 [00:00<?, ?it/s]

100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:23<00:00, 53.98it/s]


Processing chunk 2/10


100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:19<00:00, 56.41it/s]


Processing chunk 3/10


100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:21<00:00, 55.43it/s]


Processing chunk 4/10


100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:21<00:00, 55.48it/s]


Processing chunk 5/10


100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:21<00:00, 55.22it/s]


Processing chunk 6/10


100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:24<00:00, 53.36it/s]


Processing chunk 7/10


100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:26<00:00, 52.20it/s]


Processing chunk 8/10


100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:35<00:00, 47.29it/s]


Processing chunk 9/10


100%|██████████████████████████████████████████████████████████████████████████████| 4511/4511 [01:32<00:00, 48.55it/s]


Processing chunk 10/10


100%|██████████████████████████████████████████████████████████████████████████████| 4509/4509 [01:33<00:00, 48.44it/s]


In [61]:
# Assign the results to the original DataFrame
all_chunks = [pd.read_csv(f'../data/temp/accident_data_chunk_{i}.csv') for i in range(1, len(data_chunks))]
accident_df = pd.concat(all_chunks, ignore_index=True)

In [None]:
# all_chunks = [pd.read_csv(f'accident_data_chunk_{i}.csv') for i in range(1, 11 + 1)]
# accident_df = pd.concat(all_chunks, ignore_index=True)

In [62]:
accident_df

Unnamed: 0,key,objectid,bez,strasse,lor_ab_2021,ujahr,umonat,ustunde,uwochentag,ukategorie,...,istpkw,istfuss,istkrad,istgkfz,istsonstige,ustrzustand,longitude,latitude,geometry,nearest_street
0,112695-2018,112695,2,Samariterviertel,2500729,2018,1,15,4,3,...,1,1,0,0,0,1,13.475018,52.513597,POINT (13.4750178 52.51359681),Frankfurter Allee
1,112705-2018,112705,12,Ziekowstraße/Freie Scholle,12500824,2018,1,11,2,3,...,1,0,0,0,0,0,13.291022,52.587259,POINT (13.29102205 52.58725906),
2,112726-2018,112726,2,Barnimkiez,2400520,2018,1,9,3,3,...,1,1,0,0,0,0,13.420578,52.526019,POINT (13.42057818 52.52601854),Otto-Braun-Straße
3,112737-2018,112737,7,Volkspark (Rudolf-Wilde-Park),7200308,2018,1,17,2,3,...,1,1,0,0,0,0,13.348288,52.481844,POINT (13.34828776 52.48184447),Hauptstraße
4,112747-2018,112747,3,Niederschönhausen,3200206,2018,1,15,4,3,...,0,1,0,0,0,1,13.403228,52.583472,POINT (13.40322797 52.58347154),Dietzgenstraße
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45103,212640-2021,212640,1,0,1100103,2021,6,14,4,3,...,1,0,0,0,0,0,13.360415,52.505601,POINT (13.36041464 52.50560089),Genthiner Straße
45104,212639-2021,212639,2,0,2300314,2021,6,9,1,3,...,1,0,0,0,0,0,13.420077,52.502383,POINT (13.42007677 52.50238314),Adalbertstraße
45105,212638-2021,212638,9,0,9100101,2021,7,22,1,3,...,0,1,0,0,0,0,13.445723,52.493923,POINT (13.44572258 52.49392276),"['Jordanstraße', 'Lohmühlenstraße']"
45106,212637-2021,212637,2,0,2400624,2021,6,2,7,3,...,1,0,0,0,0,0,13.445424,52.512742,POINT (13.4454241 52.5127416),Marchlewskistraße


In [None]:
# accident_df.to_pickle("../data/temp/temp_adress_data.pkl")


In [68]:
accident_df["nearest_street"].value_counts(dropna=False)


nearest_street
NaN                                  1263
Landsberger Allee                     433
Kurfürstendamm                        293
Sonnenallee                           286
Frankfurter Allee                     282
                                     ... 
Richard-Willstätter-Straße              1
Pilsener Straße                         1
Heubergerweg                            1
Tile-Wardenberg-Straße                  1
['Am Weingarten', 'Sigridstraße']       1
Name: count, Length: 4470, dtype: int64

In [None]:
nan_entries = accident_df[pd.isna(accident_df["nearest_street"])]

In [None]:
nan_entries

#### Second search - Get Street Names from NaN values / Using Street data from berlin website

In [None]:
# Create a Point geometry from latitude and longitude for each accident
nan_entries['geometry'] = nan_entries.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)

# Convert the DataFrame to a GeoDataFrame
nan_entries_gdf = gpd.GeoDataFrame(nan_entries, geometry='geometry', crs="EPSG:32632")
nan_entries_gdf

In [None]:
# # Set CRS for accident data to WGS 84
# nan_entries_gdf = nan_entries_gdf.set_crs('EPSG:4326')

# Convert accident data to the same CRS as the street data
STREETS_gdf = STREETS_gdf.to_crs(crs="EPSG:4326")

In [None]:
from shapely.ops import nearest_points
from tqdm import tqdm

# Initialize columns to store the nearest street information
nan_entries_gdf['nearest_street_idx'] = None
nan_entries_gdf['second_street_search'] = None

# For each accident point, find the nearest street
for index, row in tqdm(nan_entries_gdf.iterrows()):
    point = row.geometry
    
    # Find the nearest street geometry and its index
    nearest_geom, nearest_idx = None, None
    min_distance = float('inf')
    for idx, street_row in STREETS_gdf.iterrows():
        street_geom = street_row.geometry
        distance = point.distance(street_geom)
        if distance < min_distance:
            min_distance = distance
            nearest_geom = street_geom
            nearest_idx = idx
    
    # Update the GeoDataFrame with the nearest street information
    nan_entries_gdf.at[index, 'nearest_street_idx'] = nearest_idx
    nan_entries_gdf.at[index, 'second_street_search'] = STREETS_gdf.at[nearest_idx, 'strassenna']

In [None]:
# nan_entries_gdf.to_pickle("../data/temp/temp_adress_data2.pkl")


### Combine Adress data


In [6]:
STREETS_gdf

Unnamed: 0,element_nr,strassensc,strassenna,str_bez,strassenkl,strassen_1,strassen_2,verkehrsri,bezirk,stadtteil,verkehrseb,beginnt_be,endet_bei_,laenge,gueltig_vo,okstra_id,geometry
0,34610003_34610004.01,00002,Aalemannufer,,IV,G,STRA,B,Spandau,Hakenfelde,0,34610003,34610004,262.5000,2010-01-01,D62521E5E27544729878420C54E6C59C,"MULTILINESTRING ((13.21996 52.57307, 13.22225 ..."
1,40540001_41540003.01,00005,Abbestraße,,V,G,STRA,B,Charlottenburg-Wilmersdorf,Charlottenburg,0,40540001,41540003,182.4500,2010-01-01,275EE05309AF45DCA49E046BBA0CBBCC,"MULTILINESTRING ((13.31987 52.51527, 13.32117 ..."
2,42590002_42590001.01,00022,Afrikanische Straße,,II,G,STRA,B,Mitte,Wedding,0,42590002,42590001,65.3600,2010-01-01,18F55F73EB5346F6A7A719E970B9D4EC,"MULTILINESTRING ((13.33344 52.56096, 13.33332 ..."
3,42590003_42590004.01,00022,Afrikanische Straße,,II,G,STRA,B,Mitte,Wedding,0,42590003,42590004,61.4100,2010-01-01,AE661AB3DB344DD183550F05E15A4BD7,"MULTILINESTRING ((13.33431 52.55736, 13.33458 ..."
4,42590005_42590006.01,00022,Afrikanische Straße,,II,G,STRA,B,Mitte,Wedding,0,42590005,42590006,45.3600,2010-01-01,D47D9FE02F13481FA79966646BA478FA,"MULTILINESTRING ((13.33481 52.55642, 13.33483 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43105,53530049_53530051.01,41423,Irenenstraße,,V,G,STRA,B,Lichtenberg,Rummelsburg,0,53530049,53530051,192.8000,2021-03-04,BBA9B5EF7DB242898DDE1E53868D9227,"MULTILINESTRING ((13.49975 52.50937, 13.50007 ..."
43106,53530051_53530040.01,42456,Rosenfelder Straße,,V,G,STRA,B,Lichtenberg,Rummelsburg,0,53530051,53530040,123.1700,2021-03-04,92AD8579703C4E12B378108ED328B192,"MULTILINESTRING ((13.50249 52.50890, 13.50258 ..."
43107,53530061_53530040.01,40696,Einbecker Straße,,III,G,STRA,B,Lichtenberg,Rummelsburg,0,53530061,53530040,159.5962,2021-03-04,D8CB50FB08E34C0A8AA1A0095A40B0EF,"MULTILINESTRING ((13.50141 52.51033, 13.50335 ..."
43108,53530057_53530061.01,40696,Einbecker Straße,,III,G,STRA,B,Lichtenberg,Rummelsburg,0,53530057,53530061,52.6011,2021-03-04,2EB39A91A5AB43B3B612EC86DE4A3DD6,"MULTILINESTRING ((13.50073 52.51056, 13.50127 ..."


In [3]:
import pickle

with open ("../data/temp/temp_adress_data.pkl", "rb") as f:
    final_df = pickle.load(f)

final_df

Unnamed: 0,key,objectid,bez,strasse,lor_ab_2021,ujahr,umonat,ustunde,uwochentag,ukategorie,...,longitude,latitude,PLR_ID,PLR_NAME,BZR_ID,BZR_NAME,PGR_ID,PGR_NAME,geometry,nearest_street
0,112695-2018,112695,2,Samariterviertel,2500729,2018,1,15,4,3,...,13.475018,52.513597,2500729.0,Pettenkofer Straße,25007.0,Frankfurter Allee Nord,250.0,Friedrichshain Ost,POINT (13.4750178 52.51359681),Frankfurter Allee
1,112705-2018,112705,12,Ziekowstraße/Freie Scholle,12500824,2018,1,11,2,3,...,13.291022,52.587259,12500824.0,Ziekowstraße/Freie Scholle,125008.0,West 3 - Borsigwalde/Freie Scholle,1250.0,Wittenau/Borsigwalde,POINT (13.29102205 52.58725906),
2,112726-2018,112726,2,Barnimkiez,2400520,2018,1,9,3,3,...,13.420578,52.526019,2400520.0,Barnimkiez,24005.0,Karl-Marx-Allee Nord,240.0,Friedrichshain West,POINT (13.42057818 52.52601854),Otto-Braun-Straße
3,112737-2018,112737,7,Volkspark (Rudolf-Wilde-Park),7200308,2018,1,17,2,3,...,13.348288,52.481844,7200308.0,Volkspark (Rudolph-Wilde-Park),72003.0,Schöneberg Südwest,720.0,Schöneberg Süd,POINT (13.34828776 52.48184447),Hauptstraße
4,112747-2018,112747,3,Niederschönhausen,3200206,2018,1,15,4,3,...,13.403228,52.583472,3200206.0,Pastor-Niemöller-Platz,32002.0,Blankenfelde/Niederschönhausen,320.0,Nördliches Pankow,POINT (13.40322797 52.58347154),Dietzgenstraße
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50114,112174-2021,112174,4,0,4300414,2021,3,16,4,3,...,13.296345,52.511008,4300414.0,Schloßstraße,43004.0,Schloss Charlottenburg,430.0,Charlottenburg Zentrum,POINT (13.29634478 52.51100814),"['Sophie-Charlotte-Platz', 'Kaiserdamm']"
50115,111541-2021,111541,4,0,4400726,2021,1,1,6,3,...,13.289496,52.493813,4400726.0,Bismarckallee,44007.0,Grunewald,440.0,Wilmersdorf Süd,POINT (13.28949592 52.49381321),
50116,110370-2021,110370,3,0,2400520,2021,1,20,1,3,...,13.423095,52.527534,2400520.0,Barnimkiez,24005.0,Karl-Marx-Allee Nord,240.0,Friedrichshain West,POINT (13.42309463 52.52753402),Otto-Braun-Straße
50117,109998-2021,109998,7,0,7300619,2021,1,14,7,3,...,13.359259,52.474367,7300619.0,Grazer Platz,73006.0,Friedenau Ost,730.0,Friedenau,POINT (13.35925879 52.47436651),


In [12]:
with open ("../data/temp/temp_adress_data2.pkl", "rb") as f:
    nan_df = pickle.load(f)
nan_df = nan_df[["key", "second_street_search"]]

In [13]:
nan_df

Unnamed: 0,key,second_street_search
1,112705-2018,A 111 BAB Autobahnzubringer Hamburg
7,112805-2018,A 114 BAB Autobahnzubringer Prenzlau
11,112862-2018,A 100 BAB Stadtring
18,112980-2018,A 100 BAB Stadtring
21,113020-2018,A 111 BAB Autobahnzubringer Hamburg
...,...,...
50094,112216-2021,A 100 BAB Stadtring
50107,112189-2021,A 100 BAB Stadtring
50108,112187-2021,A 100 BAB Stadtring
50115,111541-2021,100 AS Kurfürstend (südl T) Einf v Aug-Vikto-S


In [14]:
merged_df = final_df.merge(nan_df, on="key", how="left")

merged_df['nearest_street'] = merged_df['nearest_street'].fillna(merged_df['second_street_search'])

merged_df.drop(columns=['second_street_search'], inplace=True)

In [15]:
merged_df

Unnamed: 0,key,objectid,bez,strasse,lor_ab_2021,ujahr,umonat,ustunde,uwochentag,ukategorie,...,longitude,latitude,PLR_ID,PLR_NAME,BZR_ID,BZR_NAME,PGR_ID,PGR_NAME,geometry,nearest_street
0,112695-2018,112695,2,Samariterviertel,2500729,2018,1,15,4,3,...,13.475018,52.513597,2500729.0,Pettenkofer Straße,25007.0,Frankfurter Allee Nord,250.0,Friedrichshain Ost,POINT (13.4750178 52.51359681),Frankfurter Allee
1,112705-2018,112705,12,Ziekowstraße/Freie Scholle,12500824,2018,1,11,2,3,...,13.291022,52.587259,12500824.0,Ziekowstraße/Freie Scholle,125008.0,West 3 - Borsigwalde/Freie Scholle,1250.0,Wittenau/Borsigwalde,POINT (13.29102205 52.58725906),A 111 BAB Autobahnzubringer Hamburg
2,112726-2018,112726,2,Barnimkiez,2400520,2018,1,9,3,3,...,13.420578,52.526019,2400520.0,Barnimkiez,24005.0,Karl-Marx-Allee Nord,240.0,Friedrichshain West,POINT (13.42057818 52.52601854),Otto-Braun-Straße
3,112737-2018,112737,7,Volkspark (Rudolf-Wilde-Park),7200308,2018,1,17,2,3,...,13.348288,52.481844,7200308.0,Volkspark (Rudolph-Wilde-Park),72003.0,Schöneberg Südwest,720.0,Schöneberg Süd,POINT (13.34828776 52.48184447),Hauptstraße
4,112747-2018,112747,3,Niederschönhausen,3200206,2018,1,15,4,3,...,13.403228,52.583472,3200206.0,Pastor-Niemöller-Platz,32002.0,Blankenfelde/Niederschönhausen,320.0,Nördliches Pankow,POINT (13.40322797 52.58347154),Dietzgenstraße
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50114,112174-2021,112174,4,0,4300414,2021,3,16,4,3,...,13.296345,52.511008,4300414.0,Schloßstraße,43004.0,Schloss Charlottenburg,430.0,Charlottenburg Zentrum,POINT (13.29634478 52.51100814),"['Sophie-Charlotte-Platz', 'Kaiserdamm']"
50115,111541-2021,111541,4,0,4400726,2021,1,1,6,3,...,13.289496,52.493813,4400726.0,Bismarckallee,44007.0,Grunewald,440.0,Wilmersdorf Süd,POINT (13.28949592 52.49381321),100 AS Kurfürstend (südl T) Einf v Aug-Vikto-S
50116,110370-2021,110370,3,0,2400520,2021,1,20,1,3,...,13.423095,52.527534,2400520.0,Barnimkiez,24005.0,Karl-Marx-Allee Nord,240.0,Friedrichshain West,POINT (13.42309463 52.52753402),Otto-Braun-Straße
50117,109998-2021,109998,7,0,7300619,2021,1,14,7,3,...,13.359259,52.474367,7300619.0,Grazer Platz,73006.0,Friedenau Ost,730.0,Friedenau,POINT (13.35925879 52.47436651),"100 AS Alboinstr. (Westl.), Einf V Sachsend,Nau"


In [16]:
merged_df.to_csv("../data/tableau/accident_data.csv", index=False)