In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import pandas as pd

In [5]:
df = pd.read_csv('../../data/final/original_liv_data_per_year.csv')

In [6]:
df['moving_out'] = df['Vertrek']
df['moving_in'] = (df['Vestiging'] + df['Verhuizing binnen gridcel'] + df['Verhuizing']) / 3
df = df.loc[df['Neighbourhood'] != 'Hoogeind']


In [7]:
neighborhoods = df['Neighbourhood'].unique()
data = {}

for neighborhood in neighborhoods:
    data[neighborhood] = []
    df_neighborhood = df[df['Neighbourhood'] == neighborhood]
    X = df_neighborhood[
        ['green_score', 'nuisance', 'GeregistreerdeMisdrijven_1',
         'moving_in', 'moving_out']]
    y = df_neighborhood['Livability index'].to_numpy()

    model = RandomForestRegressor(random_state=42)
    model.fit(X, y)
    importances = model.feature_importances_
    data[neighborhood].append(importances)

    split = 5
    X_train, y_train, X_test, y_test = X[:split], y[:split], X[split:], y[split:]

    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)

    preds = model.predict(X_test)

    data[neighborhood].append(preds)
    data[neighborhood].append(y_test)
    data[neighborhood].append(mean_squared_error(y_test, preds))

final_df = pd.DataFrame(data).T
final_df.columns = ['Feature importance', 'pred', 'actual', 'mse']

In [8]:
final_df

Unnamed: 0,Feature importance,pred,actual,mse
Bavel,"[0.02985074626865672, 0.4477611940298508, 0.0,...","[7.0, 7.0]","[7, 8]",0.5
Belcrum,"[0.0, 0.0, 0.0, 0.0, 0.0]","[6.0, 6.0]","[6, 6]",0.0
Biesdonk,"[0.0, 0.0, 0.0, 0.0, 0.0]","[2.0, 2.0]","[2, 2]",0.0
Blauwe Kei,"[0.21210526315789444, 0.21942982456140314, 0.2...","[7.31, 7.42]","[8, 8]",0.40625
Boeimeer,"[0.0, 0.0, 0.0, 0.0, 0.0]","[8.0, 8.0]","[8, 8]",0.0
Brabantpark,"[0.11428571428571428, 0.32857142857142857, 0.0...","[6.0, 6.0]","[6, 7]",0.5
Buitengebied Bavel,"[0.0, 0.0, 0.0, 0.0, 0.0]","[9.0, 9.0]","[9, 9]",0.0
Buitengebied Prinsenbeek,"[0.06153846153846154, 0.27692307692307694, 0.3...","[8.0, 8.0]","[8, 9]",0.5
Buitengebied Teteringen,"[0.0, 0.0, 0.0, 0.0, 0.0]","[9.0, 9.0]","[9, 9]",0.0
Buitengebied Ulvenhout,"[0.0, 0.042857142857142864, 0.1285714285714286...","[9.0, 9.0]","[9, 8]",0.5
