In [2]:
import requests
import pandas as pd
import eurostat

url=f"https://appsso.eurostat.ec.europa.eu/nui/print.do"

In [3]:
df = eurostat.get_data_df('lfst_r_lfu3rt')

In [4]:
# Drop all years before 2012 and columns unit and age (same for all entries)
df.columns = df.columns.astype(str)
df = df.drop(df.loc[:, '2011': ].columns, axis = 1)
# Rename to avoid problems using \
df = df.rename(columns={'geo\\time': ' NUTS 2'})

# Extract the data for both sexes (Total) and age group 15-74
df = df[df.sex == 'T']
df = df[df.age == 'Y15-74']
df = df.drop(['sex','unit','age'], axis = 1)


In [5]:
# Merge on all entries which are also in the target variable cities to extract only the interesting cities
target_cities = pd.read_csv("Cities_with_codes.csv")
unemployment_rate = pd.merge(target_cities, df, on=[' NUTS 2'])

# Check for missing cities
missing_cities = target_cities[-target_cities[' NUTS 2'].isin(unemployment_rate[' NUTS 2'])]
missing_cities

Missing cities:  Empty DataFrame
Columns: [City,  City Code,  NUTS 2,  Country]
Index: []


In [6]:
# Check for missing values
print('Missing values for columns:')
def NaN_percent(df, column_name):
    row_count = df[column_name].shape[0]
    empty_values = row_count - df[column_name].count()
    return (100.0*empty_values)/row_count
for i in list(unemployment_rate):
    print("%s: %.2f%%" % (i, NaN_percent(unemployment_rate,i)))


Missing values for columns:
City: 0.00%
 City Code: 0.00%
 NUTS 2: 0.00%
 Country: 0.00%
2019: 1.23%
2018: 1.23%
2017: 1.23%
2016: 1.23%
2015: 1.23%
2014: 1.23%
2013: 1.23%
2012: 7.41%


In [7]:
has_nan = unemployment_rate[unemployment_rate.isna().any(axis=1)]
has_nan

Unnamed: 0,City,City Code,NUTS 2,Country,2019,2018,2017,2016,2015,2014,2013,2012
7,Belgrade,-,RS11,RS,8.3,10.9,13.4,15.7,18.7,17.2,17.9,
18,Budapest,HU001C1,HU11,HU,2.5,3.1,2.9,4.3,5.1,6.0,8.5,
28,Edinburgh,UK007C1,UKM7,UK,3.4,3.6,3.7,4.8,6.0,5.8,7.5,
34,Glasgow,UK004C1,UKM3,UK,,,,,,,,9.2
40,Kaunas,LT002C1,LT02,LT,7.2,6.9,8.1,8.9,9.8,11.6,12.6,
76,Vilnius,LT001C1,LT01,LT,4.4,4.6,4.8,5.6,7.6,8.5,9.7,
77,Warsaw,PL001C1,PL91,PL,2.1,2.4,3.5,3.7,4.9,5.8,6.0,


In [7]:
Glasgow = unemployment_rate.loc[unemployment_rate['City'] == 'Glasgow']
# Whole region of Scotland is UKM
# Glasgow is 40% of the population it should be a reasonable approximation
temp  = df[df[' NUTS 2'] == 'UKM']
temp = temp.loc[:,'2019':'2013']
Glasgow.loc[:, '2019':'2013'] = temp.loc[:, '2019':'2013'].to_numpy()
unemployment_rate[unemployment_rate['City'] == 'Glasgow'] = Glasgow

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


In [8]:
# Impute the rest using padding
unemployment_rate = unemployment_rate.interpolate(method='pad',axis=1)
print('Dataframe has NaN values: ', unemployment_rate.isnull().values.any())

Dataframe has NaN values:  False


In [9]:
#To be able to merge with the other data
#Transform the columns of each year to a variable year
yearly_data = dict()
unemployment_rate_data  = pd.DataFrame()
for year in range(2012,2020):
    yearly_data= unemployment_rate[ list(unemployment_rate.loc[:,'City':' Country']) + [f"{year}"]]
    yearly_data.insert(4, "Year", year)
    yearly_data = yearly_data.rename(columns={f"{year}": "Unemployment_Rate"})
    unemployment_rate_data = unemployment_rate_data.append(yearly_data)
unemployment_rate_data = unemployment_rate_data.reset_index(drop=True)

In [10]:
unemployment_rate_data.to_csv(path_or_buf='Unemployment_Rate.csv', index=False)

In [11]:
unemployment_rate_data

Unnamed: 0,City,City Code,NUTS 2,Country,Year,Unemployment_Rate
0,Amsterdam,NL002C1,NL32,NL,2012,5.4
1,Ankara,TR001C1,TR51,TR,2012,8.3
2,Antwerp,BE002C1,BE2,BE,2012,4.5
3,Athens,EL001C1,EL30,EL,2012,25.8
4,Barcelona,ES002C1,ES51,ES,2012,22.5
...,...,...,...,...,...,...
643,Vilnius,LT001C1,LT01,LT,2019,4.4
644,Warsaw,PL001C1,PL91,PL,2019,2.1
645,Wroclaw,PL004C1,PL51,PL,2019,3.3
646,Zagreb,HR001C1,HR04,HR,2019,6.7
