# Training models on full dataset

Here, we train the models on the full dataset, using the best hyperparameters from cross-validation. We save the models for later use in simulations. We provide model explanations, using various interpretability techniques.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error
from xgboost import XGBRegressor
import joblib

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Data

In [2]:
data = pd.read_csv('data/data.csv', low_memory=False)

In [3]:
data['Coastal?'] = data['Coastal?'].fillna(False).astype(int)

mapping_dict = {'Arid': 0, 'Snow': 1, 'Temperate': 2, 'Tropical': 3}
data['Climate Zone'] = data['Climate Zone'].map(mapping_dict)

In [4]:
features = ['Built Fraction', 'Grass Fraction', 'Tree Fraction', 
            'Built Albedo', 'Grass Albedo', 'Tree Albedo', 
            'Elevation', 'Climate Zone', 'Coastal?']

## Helper Functions

In [5]:
def get_data(label):
    df = data[features+[label]].dropna().copy()
    X  = df[features]
    y  = df[label]
    
    return X, y

In [6]:
def get_model(label, n_estimators, max_depth, filename):
    X, y = get_data(label)
    model = XGBRegressor(n_estimators=n_estimators, max_depth=max_depth, importance_type='gain', 
                         random_state=1, tree_method='hist', grow_policy = 'lossguide')
    model.fit(X,y)
    y_pred = model.predict(X)
    print('R2: ', round(model.score(X,y), 3))
    print('RMSE: ', round(mean_squared_error(y, y_pred, squared = False), 3))
    joblib.dump(model, 'models/full/' + filename + '_xgb_estimator.joblib') 
    return model

## Full Models

### CUHI Day

In [9]:
model_cuhi_d = get_model(label='CUHI Day', n_estimators=100, max_depth=10, filename='CUHI_day_summer')

R2:  0.972
RMSE:  0.116


### CUHI Night

In [10]:
model_cuhi_n = get_model(label='CUHI Night', n_estimators=100, max_depth=10, filename='CUHI_night_summer')

R2:  0.955
RMSE:  0.116


### SUHI Day

In [11]:
model_suhi_d = get_model(label='SUHI Day', n_estimators=100, max_depth=10, filename='SUHI_day_summer')

R2:  0.955
RMSE:  0.631


### SUHI Night

In [12]:
model_suhi_n = get_model(label='SUHI Night', n_estimators=100, max_depth=10, filename='SUHI_night_summer')

R2:  0.929
RMSE:  0.259
