In [3]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

In [4]:
liv = pd.read_csv('../../../data/raw/livability_dataset.csv')

In [5]:
#changing the score to numeric values
mapping = {
    'Onbekend': 1,
    'Zwak': 2,
    'Onvoldoende': 3,
    'Ruim onvoldoende': 4,
    'Voldoende': 5,
    'Ruim voldoende': 6,
    'Goed': 7,
    'Zeer goed': 8,
    'Uitstekend': 9
}
liv['2002'] = liv['2002'].map(mapping)
liv['2008'] = liv['2008'].map(mapping)
liv['2012'] = liv['2012'].map(mapping)
liv['2014'] = liv['2014'].map(mapping)
liv['2016'] = liv['2016'].map(mapping)
liv['2018'] = liv['2018'].map(mapping)
liv['2020'] = liv['2020'].map(mapping)

In [6]:
#dropping unnecessary columns
liv = liv.drop(['2002', '2008', '2012'], axis=1)

In [7]:
#creating a subset of the score per years
subset = liv[['2014', '2016', '2018', '2020']]

In [8]:
#initializng the imputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')

In [9]:
#fiting the imputer to the known data
imputer.fit(subset)

In [10]:
#Transforming the subset data to fill in the missing values
subset_imputed = imputer.transform(subset)

In [11]:
#Creating a new DataFrame with the imputed values
imputed_df = pd.DataFrame(subset_imputed, columns=['2014', '2016', '2018', '2020'])

In [12]:
#list of the years we want to impute
years_to_impute = ['2015', '2017', '2019']

In [13]:
#Iterating over the years and assigning the imputed values back to the original df

for year in years_to_impute:
    liv[year] = imputed_df.mean(axis=1)

In [14]:
#rounding the score
liv['2019'] = liv['2019'].round()
liv['2017'] = liv['2017'].round()
liv['2015'] = liv['2015'].round()


In [15]:
liv['2019'] = liv['2019'].astype(int)
liv['2017'] = liv['2017'].astype(int)
liv['2015'] = liv['2015'].astype(int)

In [16]:
#rearanging the columns
column_order = ['Neighbourhoods', '2014', '2015', '2016', '2017', '2018', '2019', '2020']
liv = liv.reindex(columns=column_order)



In [18]:
liv.to_csv('../../../data/preprocessed/prepocessed_liv.csv', index=False)
