In [1]:
from helper import get_training_data, train_lasso_regression, plot_result_correlation, get_best_lasso_model, train_xgboost,  get_best_pca_lasso_model, train_mean_model
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn import metrics
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt
from helper import prepare_socios

%matplotlib inline
%load_ext autoreload
%autoreload 2
pd.options.mode.chained_assignment = None
import warnings
warnings.filterwarnings('ignore')

## Best models

#### Unemployment rate

In [2]:
results = pd.DataFrame(columns = ['improvement_lasso', 'improvement_lasso_boosted', 'improvement_pca_lasso', 'improvement_pca_lasso_boosted', 'improvement_mean_model'])
results_mse = pd.DataFrame(columns = ['mse_naive','mse_lasso', 'mse_lasso_boosted', 'mse_pca_lasso', 'mse_pca_lasso_boosted', 'mse_mean_model'])
results_r2 = pd.DataFrame(columns = ['r2_naive', 'r2_lasso', 'r2_lasso_boosted', 'r2_pca_lasso', 'r2_pca_lasso_boosted', 'r2_mean_model'])
target = 'unemployment_rate'
dens_type = 'count'
radius = 2000

for city in ['marseille', 'lyon', 'paris']:
    country = 'FR'
    agg = get_training_data(city, country, radius, dens_type,  2015)
    
    # Lasso:
    predicts = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='predicts')
    predicts_master = predicts.copy()
    predicts_master = predicts_master.rename(columns = {'y_pred': 'y_pred_lasso'})

    # Lasso Boosted
    cols = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='used_columns')
    predicts = train_xgboost(agg, target, cols, 'predicts')
    predicts_master.loc[:, 'y_pred_lasso_boosted'] = predicts.y_pred

    # PCA Lasso
    predicts = get_best_pca_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius,output = 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso'] = predicts.y_pred

    # PCA Lasso boosted
    comps = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'components')
    cols = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'used_columns')
    reduced_data = pd.DataFrame(comps)
    reduced_data = reduced_data.join(agg.iloc[:,-5:])
    predicts = train_xgboost(reduced_data, target, cols, 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso_boosted'] = predicts.y_pred

    #predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso', 'y_pred_lasso_boosted', 'y_pred_pca_lasso', 'y_pred_pca_lasso_boosted']].mean(axis = 1)
    predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso_boosted', 'y_pred_pca_lasso_boosted']].mean(axis = 1)


    targets = ['unemployment_rate', 'income_levels', 'foreign_nationals']
    scaler = get_training_data(city, country, radius, dens_type, 2015, 'scaler')
    scaler_new = RobustScaler()
    scaler_new.center_, scaler_new.scale_ = scaler.center_[targets.index(target)], scaler.scale_[targets.index(target)]
    predicts_master = pd.DataFrame(scaler_new.inverse_transform(predicts_master), columns =predicts_master.columns)


    naive_mse = metrics.mean_squared_error(predicts_master.y_test, predicts_master.naive) 
    mse_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso)
    mse_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    mse_pca_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    mse_pca_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    mse_mean_model = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_mean_model)
    
    naive_r2 = metrics.r2_score(predicts_master.y_test, predicts_master.naive) 
    r2_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso)
    r2_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    r2_pca_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    r2_pca_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    r2_mean_model = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_mean_model)

    improvement_lasso = 100 - (mse_lasso/naive_mse)*100
    improvement_lasso_boosted = 100 - (mse_lasso_boosted /naive_mse)*100
    improvement_pca_lasso = 100 - (mse_pca_lasso/naive_mse)*100
    improvement_pca_lasso_boosted = 100 - (mse_pca_lasso_boosted/naive_mse)*100
    improvement_mean_model = 100 - (mse_mean_model/naive_mse)*100
    results.loc[city,:]=[improvement_lasso, improvement_lasso_boosted, improvement_pca_lasso, improvement_pca_lasso_boosted, improvement_mean_model]
    results_mse.loc[city,:]=[naive_mse, mse_lasso, mse_lasso_boosted, mse_pca_lasso, mse_pca_lasso_boosted, mse_mean_model]
    results_r2.loc[city,:]=[naive_r2, r2_lasso, r2_lasso_boosted, r2_pca_lasso, r2_pca_lasso_boosted, r2_mean_model]
    
for city in ['hamburg', 'berlin', 'bremen']:
    country = 'DE'
    agg = get_training_data(city, country, radius, dens_type,  2015)
    
    # Lasso:
    predicts = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='predicts')
    predicts_master = predicts.copy()
    predicts_master = predicts_master.rename(columns = {'y_pred': 'y_pred_lasso'})

    # Lasso Boosted
    cols = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='used_columns')
    predicts = train_xgboost(agg, target, cols, 'predicts')
    predicts_master.loc[:, 'y_pred_lasso_boosted'] = predicts.y_pred

    # PCA Lasso
    predicts = get_best_pca_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius,output = 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso'] = predicts.y_pred

    # PCA Lasso boosted
    comps = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'components')
    cols = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output = 'used_columns')
    reduced_data = pd.DataFrame(comps)
    reduced_data = reduced_data.join(agg.iloc[:,-5:])
    predicts = train_xgboost(reduced_data, target, cols, 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso_boosted'] = predicts.y_pred

    #predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso', 'y_pred_lasso_boosted', 'y_pred_pca_lasso', 'y_pred_pca_lasso_boosted']].mean(axis = 1)
    predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso_boosted', 'y_pred_pca_lasso_boosted']].mean(axis = 1)

    targets = ['unemployment_rate', 'income_levels', 'foreign_nationals']
    scaler = get_training_data(city, country, radius, dens_type, 2015, 'scaler')
    scaler_new = RobustScaler()
    scaler_new.center_, scaler_new.scale_ = scaler.center_[targets.index(target)], scaler.scale_[targets.index(target)]
    predicts_master = pd.DataFrame(scaler_new.inverse_transform(predicts_master), columns =predicts_master.columns)


    naive_mse = metrics.mean_squared_error(predicts_master.y_test, predicts_master.naive) 
    mse_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso)
    mse_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    mse_pca_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    mse_pca_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    mse_mean_model = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_mean_model)
    
    naive_r2 = metrics.r2_score(predicts_master.y_test, predicts_master.naive) 
    r2_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso)
    r2_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    r2_pca_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    r2_pca_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    r2_mean_model = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_mean_model)

    improvement_lasso = 100 - (mse_lasso/naive_mse)*100
    improvement_lasso_boosted = 100 - (mse_lasso_boosted /naive_mse)*100
    improvement_pca_lasso = 100 - (mse_pca_lasso/naive_mse)*100
    improvement_pca_lasso_boosted = 100 - (mse_pca_lasso_boosted/naive_mse)*100
    improvement_mean_model = 100 - (mse_mean_model/naive_mse)*100
    results.loc[city,:]=[improvement_lasso, improvement_lasso_boosted, improvement_pca_lasso, improvement_pca_lasso_boosted, improvement_mean_model]
    results_mse.loc[city,:]=[naive_mse, mse_lasso, mse_lasso_boosted, mse_pca_lasso, mse_pca_lasso_boosted, mse_mean_model]
    results_r2.loc[city,:]=[naive_r2, r2_lasso, r2_lasso_boosted, r2_pca_lasso, r2_pca_lasso_boosted, r2_mean_model]

results.loc['mean'] = results.mean()
results

shape of training data (344, 245)
0.1
0.1
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.1
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.1
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.1
shape of training data (344, 245)
shape of training data (176, 245)
0.1
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape of training data (176, 245)
shape of training data (861, 245)
0.01
0.01
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (861, 115)
0.01
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (861, 115)
0.01
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (861, 115)

Unnamed: 0,improvement_lasso,improvement_lasso_boosted,improvement_pca_lasso,improvement_pca_lasso_boosted,improvement_mean_model
marseille,33.54319,35.677128,33.279281,45.926023,46.083366
lyon,29.952328,30.383002,32.423875,-1.414124,27.632138
paris,31.877935,32.860621,31.247004,30.420571,34.091667
hamburg,42.256499,48.24072,31.905818,42.414388,53.915433
berlin,30.620772,28.069698,23.014292,17.208573,27.427625
bremen,39.876859,52.317856,39.499901,54.439443,59.106715
mean,34.68793,37.924837,31.895028,31.499146,41.376157


In [3]:
results_r2

Unnamed: 0,r2_naive,r2_lasso,r2_lasso_boosted,r2_pca_lasso,r2_pca_lasso_boosted,r2_mean_model
marseille,-0.001364,0.334526,0.355894,0.331883,0.458523,0.460098
lyon,-0.001873,0.298211,0.302526,0.322973,-0.016041,0.274966
paris,-0.014116,0.309163,0.319129,0.302765,0.294384,0.331613
hamburg,-0.124421,0.35072,0.418008,0.234335,0.352495,0.481816
berlin,-0.00026,0.306027,0.28051,0.229943,0.171871,0.274088
bremen,-0.043206,0.372792,0.502577,0.368859,0.52471,0.573399


In [4]:
results_mse

Unnamed: 0,mse_naive,mse_lasso,mse_lasso_boosted,mse_pca_lasso,mse_pca_lasso_boosted,mse_mean_model
marseille,0.010888,0.007236,0.007003,0.007264,0.005887,0.00587
lyon,0.002615,0.001832,0.00182,0.001767,0.002652,0.001892
paris,0.001694,0.001154,0.001137,0.001165,0.001179,0.001117
hamburg,0.000365,0.000211,0.000189,0.000248,0.00021,0.000168
berlin,0.000859,0.000596,0.000618,0.000661,0.000711,0.000623
bremen,0.001918,0.001153,0.000915,0.001161,0.000874,0.000784


In [5]:
results_rmse = results_mse**(0.5)
results_rmse = results_rmse.rename(columns = {'mse_naive': 'rmse_naive','mse_lasso':'rmse_lasso', 'mse_lasso_boosted':'rmse_lasso_boosted', 'mse_pca_lasso':'rmse_pca_lasso', 'mse_pca_lasso_boosted':'rmse_pca_lasso_boosted', 'mse_mean_model':'rmse_mean_model'})
results_rmse

Unnamed: 0,rmse_naive,rmse_lasso,rmse_lasso_boosted,rmse_pca_lasso,rmse_pca_lasso_boosted,rmse_mean_model
marseille,0.104344,0.085062,0.083685,0.085231,0.076729,0.076617
lyon,0.051135,0.042798,0.042666,0.042036,0.051496,0.043501
paris,0.041161,0.033973,0.033727,0.03413,0.034334,0.033416
hamburg,0.0191,0.014514,0.013742,0.015762,0.014494,0.012966
berlin,0.029304,0.024409,0.024853,0.025712,0.026664,0.024964
bremen,0.043799,0.033962,0.030244,0.034068,0.029564,0.028009


In [6]:
output = pd.concat([results, results_r2, results_mse, results_rmse], axis = 1, join = 'inner')
output = output[['mse_naive', 'rmse_naive', 'r2_naive',
               'mse_lasso', 'rmse_lasso', 'r2_lasso', 'improvement_lasso',
               'mse_lasso_boosted', 'rmse_lasso_boosted', 'r2_lasso_boosted', 'improvement_lasso_boosted',
               'mse_pca_lasso', 'rmse_pca_lasso', 'r2_pca_lasso', 'improvement_pca_lasso',
               'mse_pca_lasso_boosted', 'rmse_pca_lasso_boosted', 'r2_pca_lasso_boosted', 'improvement_pca_lasso_boosted',
               'mse_mean_model', 'rmse_mean_model', 'r2_mean_model', 'improvement_mean_model']]
output.to_excel(f'output/results/results_{target}_{dens_type}_{radius}.xlsx')
output

Unnamed: 0,mse_naive,rmse_naive,r2_naive,mse_lasso,rmse_lasso,r2_lasso,improvement_lasso,mse_lasso_boosted,rmse_lasso_boosted,r2_lasso_boosted,...,r2_pca_lasso,improvement_pca_lasso,mse_pca_lasso_boosted,rmse_pca_lasso_boosted,r2_pca_lasso_boosted,improvement_pca_lasso_boosted,mse_mean_model,rmse_mean_model,r2_mean_model,improvement_mean_model
marseille,0.010888,0.104344,-0.001364,0.007236,0.085062,0.334526,33.54319,0.007003,0.083685,0.355894,...,0.331883,33.279281,0.005887,0.076729,0.458523,45.926023,0.00587,0.076617,0.460098,46.083366
lyon,0.002615,0.051135,-0.001873,0.001832,0.042798,0.298211,29.952328,0.00182,0.042666,0.302526,...,0.322973,32.423875,0.002652,0.051496,-0.016041,-1.414124,0.001892,0.043501,0.274966,27.632138
paris,0.001694,0.041161,-0.014116,0.001154,0.033973,0.309163,31.877935,0.001137,0.033727,0.319129,...,0.302765,31.247004,0.001179,0.034334,0.294384,30.420571,0.001117,0.033416,0.331613,34.091667
hamburg,0.000365,0.0191,-0.124421,0.000211,0.014514,0.35072,42.256499,0.000189,0.013742,0.418008,...,0.234335,31.905818,0.00021,0.014494,0.352495,42.414388,0.000168,0.012966,0.481816,53.915433
berlin,0.000859,0.029304,-0.00026,0.000596,0.024409,0.306027,30.620772,0.000618,0.024853,0.28051,...,0.229943,23.014292,0.000711,0.026664,0.171871,17.208573,0.000623,0.024964,0.274088,27.427625
bremen,0.001918,0.043799,-0.043206,0.001153,0.033962,0.372792,39.876859,0.000915,0.030244,0.502577,...,0.368859,39.499901,0.000874,0.029564,0.52471,54.439443,0.000784,0.028009,0.573399,59.106715


#### Foreign Nationals

In [7]:
results = pd.DataFrame(columns = ['improvement_lasso', 'improvement_lasso_boosted', 'improvement_pca_lasso', 'improvement_pca_lasso_boosted', 'improvement_mean_model'])
results_mse = pd.DataFrame(columns = ['mse_naive','mse_lasso', 'mse_lasso_boosted', 'mse_pca_lasso', 'mse_pca_lasso_boosted', 'mse_mean_model'])
results_r2 = pd.DataFrame(columns = ['r2_naive', 'r2_lasso', 'r2_lasso_boosted', 'r2_pca_lasso', 'r2_pca_lasso_boosted', 'r2_mean_model'])
target = 'foreign_nationals'
dens_type = 'count'
radius = 2000

for city in ['marseille', 'lyon', 'paris']:
    country = 'FR'
    agg = get_training_data(city, country, radius, dens_type,  2015)
    
    # Lasso:
    predicts = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='predicts')
    predicts_master = predicts.copy()
    predicts_master = predicts_master.rename(columns = {'y_pred': 'y_pred_lasso'})

    # Lasso Boosted
    cols = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='used_columns')
    predicts = train_xgboost(agg, target, cols, 'predicts')
    predicts_master.loc[:, 'y_pred_lasso_boosted'] = predicts.y_pred

    # PCA Lasso
    predicts = get_best_pca_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius,output = 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso'] = predicts.y_pred

    # PCA Lasso boosted
    comps = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'components')
    cols = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'used_columns')
    reduced_data = pd.DataFrame(comps)
    reduced_data = reduced_data.join(agg.iloc[:,-5:])
    predicts = train_xgboost(reduced_data, target, cols, 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso_boosted'] = predicts.y_pred

    #predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso', 'y_pred_lasso_boosted', 'y_pred_pca_lasso', 'y_pred_pca_lasso_boosted']].mean(axis = 1)
    predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso_boosted', 'y_pred_pca_lasso_boosted']].mean(axis = 1)


    targets = ['unemployment_rate', 'income_levels', 'foreign_nationals']
    scaler = get_training_data(city, country, radius, dens_type, 2015, 'scaler')
    scaler_new = RobustScaler()
    scaler_new.center_, scaler_new.scale_ = scaler.center_[targets.index(target)], scaler.scale_[targets.index(target)]
    predicts_master = pd.DataFrame(scaler_new.inverse_transform(predicts_master), columns =predicts_master.columns)


    naive_mse = metrics.mean_squared_error(predicts_master.y_test, predicts_master.naive) 
    mse_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso)
    mse_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    mse_pca_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    mse_pca_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    mse_mean_model = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_mean_model)
    
    naive_r2 = metrics.r2_score(predicts_master.y_test, predicts_master.naive) 
    r2_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso)
    r2_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    r2_pca_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    r2_pca_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    r2_mean_model = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_mean_model)

    improvement_lasso = 100 - (mse_lasso/naive_mse)*100
    improvement_lasso_boosted = 100 - (mse_lasso_boosted /naive_mse)*100
    improvement_pca_lasso = 100 - (mse_pca_lasso/naive_mse)*100
    improvement_pca_lasso_boosted = 100 - (mse_pca_lasso_boosted/naive_mse)*100
    improvement_mean_model = 100 - (mse_mean_model/naive_mse)*100
    results.loc[city,:]=[improvement_lasso, improvement_lasso_boosted, improvement_pca_lasso, improvement_pca_lasso_boosted, improvement_mean_model]
    results_mse.loc[city,:]=[naive_mse, mse_lasso, mse_lasso_boosted, mse_pca_lasso, mse_pca_lasso_boosted, mse_mean_model]
    results_r2.loc[city,:]=[naive_r2, r2_lasso, r2_lasso_boosted, r2_pca_lasso, r2_pca_lasso_boosted, r2_mean_model]
    
for city in ['hamburg', 'berlin', 'bremen']:
    country = 'DE'
    agg = get_training_data(city, country, radius, dens_type,  2015)
    
    # Lasso:
    predicts = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='predicts')
    predicts_master = predicts.copy()
    predicts_master = predicts_master.rename(columns = {'y_pred': 'y_pred_lasso'})

    # Lasso Boosted
    cols = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='used_columns')
    predicts = train_xgboost(agg, target, cols, 'predicts')
    predicts_master.loc[:, 'y_pred_lasso_boosted'] = predicts.y_pred

    # PCA Lasso
    predicts = get_best_pca_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius,output = 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso'] = predicts.y_pred

    # PCA Lasso boosted
    comps = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'components')
    cols = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output = 'used_columns')
    reduced_data = pd.DataFrame(comps)
    reduced_data = reduced_data.join(agg.iloc[:,-5:])
    predicts = train_xgboost(reduced_data, target, cols, 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso_boosted'] = predicts.y_pred

    #predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso', 'y_pred_lasso_boosted', 'y_pred_pca_lasso', 'y_pred_pca_lasso_boosted']].mean(axis = 1)
    predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso_boosted', 'y_pred_pca_lasso_boosted']].mean(axis = 1)

    targets = ['unemployment_rate', 'income_levels', 'foreign_nationals']
    scaler = get_training_data(city, country, radius, dens_type, 2015, 'scaler')
    scaler_new = RobustScaler()
    scaler_new.center_, scaler_new.scale_ = scaler.center_[targets.index(target)], scaler.scale_[targets.index(target)]
    predicts_master = pd.DataFrame(scaler_new.inverse_transform(predicts_master), columns =predicts_master.columns)


    naive_mse = metrics.mean_squared_error(predicts_master.y_test, predicts_master.naive) 
    mse_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso)
    mse_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    mse_pca_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    mse_pca_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    mse_mean_model = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_mean_model)

    improvement_lasso = 100 - (mse_lasso/naive_mse)*100
    improvement_lasso_boosted = 100 - (mse_lasso_boosted /naive_mse)*100
    improvement_pca_lasso = 100 - (mse_pca_lasso/naive_mse)*100
    improvement_pca_lasso_boosted = 100 - (mse_pca_lasso_boosted/naive_mse)*100
    improvement_mean_model = 100 - (mse_mean_model/naive_mse)*100
    results.loc[city,:]=[improvement_lasso, improvement_lasso_boosted, improvement_pca_lasso, improvement_pca_lasso_boosted, improvement_mean_model]
    results_mse.loc[city,:]=[naive_mse, mse_lasso, mse_lasso_boosted, mse_pca_lasso, mse_pca_lasso_boosted, mse_mean_model]
    results_r2.loc[city,:]=[naive_r2, r2_lasso, r2_lasso_boosted, r2_pca_lasso, r2_pca_lasso_boosted, r2_mean_model]
    
results.loc['mean'] = results.mean()
results

shape of training data (344, 245)
0.01
0.01
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.01
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.01
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.01
shape of training data (344, 245)
shape of training data (176, 245)
0.1
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape of training data (176, 245)
shape of training data (861, 245)
0.01
0.01
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (861, 115)
0.01
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (861, 115)
0.01
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (861,

Unnamed: 0,improvement_lasso,improvement_lasso_boosted,improvement_pca_lasso,improvement_pca_lasso_boosted,improvement_mean_model
marseille,68.179343,72.485641,69.682891,68.12693,73.497129
lyon,27.478481,24.991409,34.462853,29.277679,31.846925
paris,28.463543,32.755896,26.949614,27.359803,32.597504
hamburg,39.987653,31.75193,57.138051,43.906912,46.322584
berlin,60.964426,66.075785,54.951802,49.247971,64.682828
bremen,21.765591,30.555339,18.911118,26.033816,32.970147
mean,41.13984,43.102667,43.682722,40.658852,46.986186


In [8]:
results_mse

Unnamed: 0,mse_naive,mse_lasso,mse_lasso_boosted,mse_pca_lasso,mse_pca_lasso_boosted,mse_mean_model
marseille,0.009139,0.002908,0.002514,0.002771,0.002913,0.002422
lyon,0.002496,0.00181,0.001872,0.001636,0.001765,0.001701
paris,0.002873,0.002055,0.001932,0.002099,0.002087,0.001937
hamburg,0.005245,0.003148,0.00358,0.002248,0.002942,0.002815
berlin,0.003961,0.001546,0.001344,0.001785,0.00201,0.001399
bremen,0.006663,0.005213,0.004627,0.005403,0.004929,0.004466


In [9]:
results_rmse = results_mse**(0.5)
results_rmse = results_rmse.rename(columns = {'mse_naive': 'rmse_naive','mse_lasso':'rmse_lasso', 'mse_lasso_boosted':'rmse_lasso_boosted', 'mse_pca_lasso':'rmse_pca_lasso', 'mse_pca_lasso_boosted':'rmse_pca_lasso_boosted', 'mse_mean_model':'rmse_mean_model'})
results_rmse

Unnamed: 0,rmse_naive,rmse_lasso,rmse_lasso_boosted,rmse_pca_lasso,rmse_pca_lasso_boosted,rmse_mean_model
marseille,0.095596,0.053925,0.050144,0.052636,0.05397,0.049214
lyon,0.049962,0.042548,0.043271,0.040447,0.042016,0.041246
paris,0.053601,0.045336,0.043954,0.045813,0.045684,0.044006
hamburg,0.072423,0.056104,0.05983,0.047414,0.054241,0.05306
berlin,0.062939,0.039324,0.036659,0.042244,0.044838,0.037404
bremen,0.081628,0.072201,0.068024,0.073506,0.070203,0.066831


In [10]:
output = pd.concat([results, results_r2, results_mse, results_rmse], axis = 1, join = 'inner')
output = output[['mse_naive', 'rmse_naive', 'r2_naive',
               'mse_lasso', 'rmse_lasso', 'r2_lasso', 'improvement_lasso',
               'mse_lasso_boosted', 'rmse_lasso_boosted', 'r2_lasso_boosted', 'improvement_lasso_boosted',
               'mse_pca_lasso', 'rmse_pca_lasso', 'r2_pca_lasso', 'improvement_pca_lasso',
               'mse_pca_lasso_boosted', 'rmse_pca_lasso_boosted', 'r2_pca_lasso_boosted', 'improvement_pca_lasso_boosted',
               'mse_mean_model', 'rmse_mean_model', 'r2_mean_model', 'improvement_mean_model']]
output.to_excel(f'output/results/results_{target}_{dens_type}_{radius}.xlsx')
output

Unnamed: 0,mse_naive,rmse_naive,r2_naive,mse_lasso,rmse_lasso,r2_lasso,improvement_lasso,mse_lasso_boosted,rmse_lasso_boosted,r2_lasso_boosted,...,r2_pca_lasso,improvement_pca_lasso,mse_pca_lasso_boosted,rmse_pca_lasso_boosted,r2_pca_lasso_boosted,improvement_pca_lasso_boosted,mse_mean_model,rmse_mean_model,r2_mean_model,improvement_mean_model
marseille,0.009139,0.095596,-0.026125,0.002908,0.053925,0.67348,68.179343,0.002514,0.050144,0.717668,...,0.688909,69.682891,0.002913,0.05397,0.672942,68.12693,0.002422,0.049214,0.728047,73.497129
lyon,0.002496,0.049962,-0.002119,0.00181,0.042548,0.273248,27.478481,0.001872,0.043271,0.248325,...,0.34324,34.462853,0.001765,0.042016,0.291278,29.277679,0.001701,0.041246,0.317025,31.846925
paris,0.002873,0.053601,-0.021716,0.002055,0.045336,0.269101,28.463543,0.001932,0.043954,0.312956,...,0.253633,26.949614,0.002087,0.045684,0.257824,27.359803,0.001937,0.044006,0.311338,32.597504
hamburg,0.005245,0.072423,-0.021716,0.003148,0.056104,0.269101,39.987653,0.00358,0.05983,0.312956,...,0.253633,57.138051,0.002942,0.054241,0.257824,43.906912,0.002815,0.05306,0.311338,46.322584
berlin,0.003961,0.062939,-0.021716,0.001546,0.039324,0.269101,60.964426,0.001344,0.036659,0.312956,...,0.253633,54.951802,0.00201,0.044838,0.257824,49.247971,0.001399,0.037404,0.311338,64.682828
bremen,0.006663,0.081628,-0.021716,0.005213,0.072201,0.269101,21.765591,0.004627,0.068024,0.312956,...,0.253633,18.911118,0.004929,0.070203,0.257824,26.033816,0.004466,0.066831,0.311338,32.970147


#### Income levels

In [12]:
results = pd.DataFrame(columns = ['improvement_lasso', 'improvement_lasso_boosted', 'improvement_pca_lasso', 'improvement_pca_lasso_boosted', 'improvement_mean_model'])
results_mse = pd.DataFrame(columns = ['mse_naive','mse_lasso', 'mse_lasso_boosted', 'mse_pca_lasso', 'mse_pca_lasso_boosted', 'mse_mean_model'])
results_r2 = pd.DataFrame(columns = ['r2_naive', 'r2_lasso', 'r2_lasso_boosted', 'r2_pca_lasso', 'r2_pca_lasso_boosted', 'r2_mean_model'])
target = 'income_levels'
dens_type = 'count'
radius = 2000

for city in ['marseille', 'lyon', 'paris']:
    country = 'FR'
    agg = get_training_data(city, country, radius, dens_type,  2015)
    
    # Lasso:
    predicts = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='predicts')
    predicts_master = predicts.copy()
    predicts_master = predicts_master.rename(columns = {'y_pred': 'y_pred_lasso'})

    # Lasso Boosted
    cols = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='used_columns')
    predicts = train_xgboost(agg, target, cols, 'predicts')
    predicts_master.loc[:, 'y_pred_lasso_boosted'] = predicts.y_pred

    # PCA Lasso
    predicts = get_best_pca_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius,output = 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso'] = predicts.y_pred

    # PCA Lasso boosted
    comps = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'components')
    cols = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'used_columns')
    reduced_data = pd.DataFrame(comps)
    reduced_data = reduced_data.join(agg.iloc[:,-5:])
    predicts = train_xgboost(reduced_data, target, cols, 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso_boosted'] = predicts.y_pred

    #predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso', 'y_pred_lasso_boosted', 'y_pred_pca_lasso', 'y_pred_pca_lasso_boosted']].mean(axis = 1)
    predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso_boosted', 'y_pred_pca_lasso_boosted']].mean(axis = 1)


    targets = ['unemployment_rate', 'income_levels', 'foreign_nationals']
    scaler = get_training_data(city, country, radius, dens_type, 2015, 'scaler')
    scaler_new = RobustScaler()
    scaler_new.center_, scaler_new.scale_ = scaler.center_[targets.index(target)], scaler.scale_[targets.index(target)]
    predicts_master = pd.DataFrame(scaler_new.inverse_transform(predicts_master), columns =predicts_master.columns)


    naive_mse = metrics.mean_squared_error(predicts_master.y_test, predicts_master.naive) 
    mse_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso)
    mse_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    mse_pca_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    mse_pca_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    mse_mean_model = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_mean_model)
    
    naive_r2 = metrics.r2_score(predicts_master.y_test, predicts_master.naive) 
    r2_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso)
    r2_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    r2_pca_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    r2_pca_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    r2_mean_model = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_mean_model)

    improvement_lasso = 100 - (mse_lasso/naive_mse)*100
    improvement_lasso_boosted = 100 - (mse_lasso_boosted /naive_mse)*100
    improvement_pca_lasso = 100 - (mse_pca_lasso/naive_mse)*100
    improvement_pca_lasso_boosted = 100 - (mse_pca_lasso_boosted/naive_mse)*100
    improvement_mean_model = 100 - (mse_mean_model/naive_mse)*100
    results.loc[city,:]=[improvement_lasso, improvement_lasso_boosted, improvement_pca_lasso, improvement_pca_lasso_boosted, improvement_mean_model]
    results_mse.loc[city,:]=[naive_mse, mse_lasso, mse_lasso_boosted, mse_pca_lasso, mse_pca_lasso_boosted, mse_mean_model]
    results_r2.loc[city,:]=[naive_r2, r2_lasso, r2_lasso_boosted, r2_pca_lasso, r2_pca_lasso_boosted, r2_mean_model]
    
for city in ['hamburg', 'berlin', 'bremen']:
    country = 'DE'
    agg = get_training_data(city, country, radius, dens_type,  2015)
    
    # Lasso:
    predicts = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='predicts')
    predicts_master = predicts.copy()
    predicts_master = predicts_master.rename(columns = {'y_pred': 'y_pred_lasso'})

    # Lasso Boosted
    cols = get_best_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output ='used_columns')
    predicts = train_xgboost(agg, target, cols, 'predicts')
    predicts_master.loc[:, 'y_pred_lasso_boosted'] = predicts.y_pred

    # PCA Lasso
    predicts = get_best_pca_lasso_model(agg=agg, target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius,output = 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso'] = predicts.y_pred

    # PCA Lasso boosted
    comps = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015,  density_type=dens_type, radius = radius, output = 'components')
    cols = get_best_pca_lasso_model(agg=agg,target=target, city=city, country=country, socio_year=2015, density_type=dens_type, radius = radius, output = 'used_columns')
    reduced_data = pd.DataFrame(comps)
    reduced_data = reduced_data.join(agg.iloc[:,-5:])
    predicts = train_xgboost(reduced_data, target, cols, 'predicts')
    predicts_master.loc[:,'y_pred_pca_lasso_boosted'] = predicts.y_pred

    #predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso', 'y_pred_lasso_boosted', 'y_pred_pca_lasso', 'y_pred_pca_lasso_boosted']].mean(axis = 1)
    predicts_master.loc[:, 'y_pred_mean_model'] = predicts_master[['y_pred_lasso_boosted', 'y_pred_pca_lasso_boosted']].mean(axis = 1)

    targets = ['unemployment_rate', 'income_levels', 'foreign_nationals']
    scaler = get_training_data(city, country, radius, dens_type, 2015, 'scaler')
    scaler_new = RobustScaler()
    scaler_new.center_, scaler_new.scale_ = scaler.center_[targets.index(target)], scaler.scale_[targets.index(target)]
    predicts_master = pd.DataFrame(scaler_new.inverse_transform(predicts_master), columns =predicts_master.columns)


    naive_mse = metrics.mean_squared_error(predicts_master.y_test, predicts_master.naive) 
    mse_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso)
    mse_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    mse_pca_lasso = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    mse_pca_lasso_boosted = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    mse_mean_model = metrics.mean_squared_error(predicts_master.y_test, predicts_master.y_pred_mean_model)
    
    naive_r2 = metrics.r2_score(predicts_master.y_test, predicts_master.naive) 
    r2_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso)
    r2_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_lasso_boosted)
    r2_pca_lasso = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso)
    r2_pca_lasso_boosted = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_pca_lasso_boosted)
    r2_mean_model = metrics.r2_score(predicts_master.y_test, predicts_master.y_pred_mean_model)

    improvement_lasso = 100 - (mse_lasso/naive_mse)*100
    improvement_lasso_boosted = 100 - (mse_lasso_boosted /naive_mse)*100
    improvement_pca_lasso = 100 - (mse_pca_lasso/naive_mse)*100
    improvement_pca_lasso_boosted = 100 - (mse_pca_lasso_boosted/naive_mse)*100
    improvement_mean_model = 100 - (mse_mean_model/naive_mse)*100
    results.loc[city,:]=[improvement_lasso, improvement_lasso_boosted, improvement_pca_lasso, improvement_pca_lasso_boosted, improvement_mean_model]
    results_mse.loc[city,:]=[naive_mse, mse_lasso, mse_lasso_boosted, mse_pca_lasso, mse_pca_lasso_boosted, mse_mean_model]
    results_r2.loc[city,:]=[naive_r2, r2_lasso, r2_lasso_boosted, r2_pca_lasso, r2_pca_lasso_boosted, r2_mean_model]
    
results.loc['mean'] = results.mean()
results

shape of training data (344, 245)
0.01
0.01
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.01
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.01
shape before pca: (344, 238)
number of pca components: 75
shape after pca: (344, 75)
0.01
shape of training data (344, 245)
shape of training data (176, 245)
0.1
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape before pca: (176, 238)
number of pca components: 65
shape after pca: (176, 65)
0.1
shape of training data (176, 245)
shape of training data (861, 245)
0.001
0.001
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (861, 115)
0.01
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (861, 115)
0.01
shape before pca: (861, 238)
number of pca components: 115
shape after pca: (86

Unnamed: 0,improvement_lasso,improvement_lasso_boosted,improvement_pca_lasso,improvement_pca_lasso_boosted,improvement_mean_model
marseille,73.402629,73.193619,69.83887,66.569906,73.938939
lyon,46.281495,51.623423,54.504775,58.056359,60.056861
paris,71.048726,77.464578,70.285942,72.817909,77.602627
hamburg,40.571145,32.00454,50.548295,58.390943,60.858558
berlin,24.056443,29.343344,5.055516,7.637401,26.440731
bremen,36.977137,44.684152,32.983787,38.984238,46.191425
mean,48.722929,51.385609,47.202864,50.409459,57.514857


In [13]:
results_r2

Unnamed: 0,r2_naive,r2_lasso,r2_lasso_boosted,r2_pca_lasso,r2_pca_lasso_boosted,r2_mean_model
marseille,-0.000792,0.733816,0.731724,0.69815,0.665434,0.739183
lyon,-0.000858,0.462354,0.515819,0.544657,0.580204,0.600226
paris,-0.005564,0.708876,0.773392,0.701206,0.726667,0.77478
hamburg,-0.042585,0.380404,0.29109,0.484424,0.56619,0.591917
berlin,-0.001062,0.239758,0.292683,0.049546,0.075393,0.263626
bremen,-0.205356,0.24035,0.333247,0.192216,0.264543,0.351415


In [14]:
results_mse

Unnamed: 0,mse_naive,mse_lasso,mse_lasso_boosted,mse_pca_lasso,mse_pca_lasso_boosted,mse_mean_model
marseille,361248.317935,96082.554251,96837.601254,108956.57389,120765.652453,94145.145073
lyon,244401.878031,131289.035384,118233.263105,111191.183789,102511.045104,97621.78296
paris,840140.3386,243231.329724,189329.169475,249639.784827,228367.710212,188169.365824
hamburg,667237.806043,396531.787797,453691.413046,329960.472725,277631.358826,261166.499597
berlin,731046.943231,555183.050027,516533.324729,694088.74698,675213.959313,537752.784529
bremen,626827.178684,395044.431178,346734.769259,420075.836582,382463.376578,337286.774457


In [15]:
results_rmse = results_mse**(0.5)
results_rmse = results_rmse.rename(columns = {'mse_naive': 'rmse_naive','mse_lasso':'rmse_lasso', 'mse_lasso_boosted':'rmse_lasso_boosted', 'mse_pca_lasso':'rmse_pca_lasso', 'mse_pca_lasso_boosted':'rmse_pca_lasso_boosted', 'mse_mean_model':'rmse_mean_model'})
results_rmse

Unnamed: 0,rmse_naive,rmse_lasso,rmse_lasso_boosted,rmse_pca_lasso,rmse_pca_lasso_boosted,rmse_mean_model
marseille,601.039365,309.97186,311.187405,330.085707,347.513528,306.830809
lyon,494.370183,362.338289,343.850641,333.453421,320.173461,312.444848
paris,916.591697,493.184884,435.119719,499.639655,477.878342,433.78493
hamburg,816.846256,629.7077,673.566191,574.42186,526.907353,511.044518
berlin,855.012832,745.106066,718.702529,833.119888,821.714037,733.316292
bremen,791.724181,628.525601,588.841888,648.132576,618.436235,580.763958


In [16]:
output = pd.concat([results, results_r2, results_mse, results_rmse], axis = 1, join = 'inner')
output = output[['mse_naive', 'rmse_naive', 'r2_naive',
               'mse_lasso', 'rmse_lasso', 'r2_lasso', 'improvement_lasso',
               'mse_lasso_boosted', 'rmse_lasso_boosted', 'r2_lasso_boosted', 'improvement_lasso_boosted',
               'mse_pca_lasso', 'rmse_pca_lasso', 'r2_pca_lasso', 'improvement_pca_lasso',
               'mse_pca_lasso_boosted', 'rmse_pca_lasso_boosted', 'r2_pca_lasso_boosted', 'improvement_pca_lasso_boosted',
               'mse_mean_model', 'rmse_mean_model', 'r2_mean_model', 'improvement_mean_model']]
output.to_excel(f'output/results/results_{target}_{dens_type}_{radius}.xlsx')
output

Unnamed: 0,mse_naive,rmse_naive,r2_naive,mse_lasso,rmse_lasso,r2_lasso,improvement_lasso,mse_lasso_boosted,rmse_lasso_boosted,r2_lasso_boosted,...,r2_pca_lasso,improvement_pca_lasso,mse_pca_lasso_boosted,rmse_pca_lasso_boosted,r2_pca_lasso_boosted,improvement_pca_lasso_boosted,mse_mean_model,rmse_mean_model,r2_mean_model,improvement_mean_model
marseille,361248.317935,601.039365,-0.000792,96082.554251,309.97186,0.733816,73.402629,96837.601254,311.187405,0.731724,...,0.69815,69.83887,120765.652453,347.513528,0.665434,66.569906,94145.145073,306.830809,0.739183,73.938939
lyon,244401.878031,494.370183,-0.000858,131289.035384,362.338289,0.462354,46.281495,118233.263105,343.850641,0.515819,...,0.544657,54.504775,102511.045104,320.173461,0.580204,58.056359,97621.78296,312.444848,0.600226,60.056861
paris,840140.3386,916.591697,-0.005564,243231.329724,493.184884,0.708876,71.048726,189329.169475,435.119719,0.773392,...,0.701206,70.285942,228367.710212,477.878342,0.726667,72.817909,188169.365824,433.78493,0.77478,77.602627
hamburg,667237.806043,816.846256,-0.042585,396531.787797,629.7077,0.380404,40.571145,453691.413046,673.566191,0.29109,...,0.484424,50.548295,277631.358826,526.907353,0.56619,58.390943,261166.499597,511.044518,0.591917,60.858558
berlin,731046.943231,855.012832,-0.001062,555183.050027,745.106066,0.239758,24.056443,516533.324729,718.702529,0.292683,...,0.049546,5.055516,675213.959313,821.714037,0.075393,7.637401,537752.784529,733.316292,0.263626,26.440731
bremen,626827.178684,791.724181,-0.205356,395044.431178,628.525601,0.24035,36.977137,346734.769259,588.841888,0.333247,...,0.192216,32.983787,382463.376578,618.436235,0.264543,38.984238,337286.774457,580.763958,0.351415,46.191425
