In [1]:
import requests
import pandas as pd
import eurostat

url=f"https://appsso.eurostat.ec.europa.eu/nui/print.do"
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [2]:
df = eurostat.get_data_df('tran_r_acci')
df.columns = df.columns.astype(str)
# Drop all years before 2012, keep only with unit measure Per Million Inhabitants and accident type deadly
df = df.drop(df.loc[:, '2011': ].columns, axis = 1)
df = df[df.unit == 'P_MHAB']
df = df[df.victim == 'KIL']




In [3]:
# Rename to avoid problems using \
df = df.rename(columns={'geo\\time': ' NUTS 2'})
df.head()

Unnamed: 0,victim,unit,NUTS 2,2018,2017,2016,2015,2014,2013,2012
1410,KIL,P_MHAB,AT,46.0,47.0,50.0,56.0,51.0,54.0,63.0
1411,KIL,P_MHAB,AT1,35.0,36.0,40.0,45.0,45.0,40.0,55.0
1412,KIL,P_MHAB,AT11,44.0,86.0,65.0,83.0,77.0,59.0,105.0
1413,KIL,P_MHAB,AT12,62.0,56.0,68.0,80.0,74.0,69.0,90.0
1414,KIL,P_MHAB,AT13,10.0,11.0,10.0,7.0,12.0,10.0,14.0


In [4]:
# Merge on all entries which are also in the target variable cities to extract only the interesting cities
target_cities = pd.read_csv("Cities_with_codes.csv")
road_accidents = pd.merge(target_cities, df, on=[' NUTS 2'])

# Check for missing cities
missing_cities = target_cities[-target_cities[' NUTS 2'].isin(road_accidents[' NUTS 2'])]
missing_cities

Unnamed: 0,City,City Code,NUTS 2,Country
6,Belgrade,-,RS11,RS
27,Edinburgh,UK007C1,UKM7,UK
33,Glasgow,UK004C1,UKM3,UK
58,Reykjavik,IS001C1,IS00,IS


In [5]:
# Check for missing values
print('Missing values for columns:')
def NaN_percent(df, column_name):
    row_count = df[column_name].shape[0]
    empty_values = row_count - df[column_name].count()
    return (100.0*empty_values)/row_count
for i in list(road_accidents):
    print("%s: %.2f%%" % (i, NaN_percent(road_accidents,i)))


Missing values for columns:
City: 0.00%
 City Code: 0.00%
 NUTS 2: 0.00%
 Country: 0.00%
victim: 0.00%
unit: 0.00%
2018: 0.00%
2017: 0.00%
2016: 1.30%
2015: 1.30%
2014: 1.30%
2013: 9.09%
2012: 9.09%


In [6]:
has_nan = road_accidents[road_accidents.isna().any(axis=1)]
has_nan

Unnamed: 0,City,City Code,NUTS 2,Country,victim,unit,2018,2017,2016,2015,2014,2013,2012
17,Budapest,HU001C1,HU11,HU,KIL,P_MHAB,29.0,28.0,32.0,30.0,29.0,,
24,Cork,IE002C1,IE05,IE,KIL,P_MHAB,33.0,36.0,40.0,42.0,44.0,,
25,Dublin,IE001C1,IE06,IE,KIL,P_MHAB,22.0,25.0,34.0,20.0,36.0,,
37,Kaunas,LT002C1,LT02,LT,KIL,P_MHAB,57.0,72.0,70.0,90.0,99.0,,
42,Ljubljana,SI001C1,SI04,SI,KIL,P_MHAB,34.0,41.0,57.0,58.0,55.0,,
72,Vilnius,LT001C1,LT01,LT,KIL,P_MHAB,72.0,53.0,58.0,63.0,68.0,,
73,Warsaw,PL001C1,PL91,PL,KIL,P_MHAB,50.0,61.0,,,,,


In [7]:
# Impute using padding
road_accidents.fillna(method='pad',axis=1)

Unnamed: 0,City,City Code,NUTS 2,Country,victim,unit,2018,2017,2016,2015,2014,2013,2012
0,Amsterdam,NL002C1,NL32,NL,KIL,P_MHAB,32,29,31,28,26,26,33
1,Ankara,TR001C1,TR51,TR,KIL,P_MHAB,72,73,73,75,30,32,42
2,Antwerp,BE002C1,BE2,BE,KIL,P_MHAB,47,46,52,61,63,60,60
3,Athens,EL001C1,EL30,EL,KIL,P_MHAB,48,43,53,54,47,54,59
4,Barcelona,ES002C1,ES51,ES,KIL,P_MHAB,44,38,38,39,37,36,45
5,Valencia,ES003C1,ES51,ES,KIL,P_MHAB,44,38,38,39,37,36,45
6,Belfast,UK012C1,UKN0,UK,KIL,P_MHAB,29,34,37,40,43,31,26
7,Bergen,NO002C1,NO05,NO,KIL,P_MHAB,23,26,18,16,22,46,29
8,Berlin,DE001C1,DE30,DE,KIL,P_MHAB,12,10,16,14,15,11,13
9,Birmingham,UK002C1,UKG3,UK,KIL,P_MHAB,20,20,21,20,20,22,21


In [8]:
#To be able to merge with the other data
#Transform the columns of each year to a variable year
yearly_data = dict()
deaths_in_road_accidents  = pd.DataFrame()
for year in range(2012,2019):
    yearly_data= road_accidents[ list(road_accidents.loc[:,'City':' Country']) + [f"{year}"]]
    yearly_data.insert(4, "Year", year)
    yearly_data = yearly_data.rename(columns={f"{year}": "Unemployment_Rate"})
    deaths_in_road_accidents = deaths_in_road_accidents.append(yearly_data)
deaths_in_road_accidents = deaths_in_road_accidents.reset_index(drop=True)

In [9]:
# Save as csv
deaths_in_road_accidents.to_csv(path_or_buf='deaths_in_road_accidents.csv')

In [10]:
deaths_in_road_accidents

Unnamed: 0,City,City Code,NUTS 2,Country,Year,Unemployment_Rate
0,Amsterdam,NL002C1,NL32,NL,2012,33.0
1,Ankara,TR001C1,TR51,TR,2012,42.0
2,Antwerp,BE002C1,BE2,BE,2012,60.0
3,Athens,EL001C1,EL30,EL,2012,59.0
4,Barcelona,ES002C1,ES51,ES,2012,45.0
5,Valencia,ES003C1,ES51,ES,2012,45.0
6,Belfast,UK012C1,UKN0,UK,2012,26.0
7,Bergen,NO002C1,NO05,NO,2012,29.0
8,Berlin,DE001C1,DE30,DE,2012,13.0
9,Birmingham,UK002C1,UKG3,UK,2012,21.0
