In [5]:
# library
import pandas as pd
import os 
import feather
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import logsumexp
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer
from models import rf
from utils import *


In [6]:
# read in 
WU = feather.read_dataframe('Data/WU_lag.feather')
WU2 = WU
#pp
WU = WU.drop(columns=['Index','year','ParcelID','Days','year','month'])


WU_train = WU[WU['Set'] == 'train'].drop(columns=['Set'])
WU_dev = WU[WU['Set'] == 'dev'].drop(columns=['Set'])
WU_test = WU[WU['Set'] == 'test'].drop(columns=['Set'])

WU_train_all =  WU[WU['Set'] != 'test'].drop(columns=['Set'])

predictors = list( WU_train.drop(columns=['TotalWaterUse']).columns)

X_train = WU_train.drop(columns=['TotalWaterUse']).values
Y_train = WU_train.loc[:,'TotalWaterUse'].values

X_dev = WU_dev.drop(columns=['TotalWaterUse']).values
Y_dev = WU_dev.loc[:,'TotalWaterUse'].values

X_test = WU_test.drop(columns=['TotalWaterUse']).values
Y_test = WU_test.loc[:,'TotalWaterUse'].values

X_train_all = WU_train_all.drop(columns=['TotalWaterUse']).values
Y_train_all = WU_train_all.loc[:,'TotalWaterUse'].values


# UQRF

In [7]:
reg, tau, mtry, nll = rf.gridRF(X_train, Y_train, X_dev, Y_dev)

mtry: 1 	 NLL: 8.590279393537461 	 tau:1.33514404296875e-05 	 erorr: 1420.8585840268313
mtry: 2 	 NLL: 8.477430370313298 	 tau:1.24359130859375e-05 	 erorr: 1271.140056485419
mtry: 3 	 NLL: 8.428652501945926 	 tau:1.1978149414062499e-05 	 erorr: 1205.9420236157875
mtry: 4 	 NLL: 8.400247253498131 	 tau:1.24359130859375e-05 	 erorr: 1172.2801864515536
mtry: 5 	 NLL: 8.388116308531147 	 tau:1.0833740234375e-05 	 erorr: 1156.209262502408
mtry: 6 	 NLL: 8.383296028375604 	 tau:9.765625e-06 	 erorr: 1144.376668369649
mtry: 7 	 NLL: 8.381235235861647 	 tau:9.422302246093749e-06 	 erorr: 1142.3509221464888
mtry: 8 	 NLL: 8.376708539759829 	 tau:9.651184082031251e-06 	 erorr: 1139.6344002065607
mtry: 9 	 NLL: 8.386674480082434 	 tau:7.0190429687499995e-06 	 erorr: 1137.4726812863444
mtry: 10 	 NLL: 8.380331119833803 	 tau:8.27789306640625e-06 	 erorr: 1136.6926548868585
mtry: 11 	 NLL: 8.38722421904126 	 tau:6.90460205078125e-06 	 erorr: 1139.5090943569296
mtry: 12 	 NLL: 8.382899472419828 	 t

In [9]:
# dev
tau, mtry = 9.651184082031251e-06, 8

model = rf.rf(X_train, Y_train, mtry = mtry, tau = tau) 

y_hat, L_hat, U_hat, p_y, sig_hat, _ = model.predict_MC(X_dev, Y_dev, alpha= 0.05)

In [10]:
y_true = Y_dev
all_test_MF = pd.DataFrame({
    'Index': WU2[WU2.loc[:,'Set']=='dev'].loc[:,'Index'].values,
    'Y': y_true,
    'y_hat': y_hat,
    'L': np.maximum(L_hat,0 ),
    'U': U_hat,
    'sig_hat': sig_hat,
    'p_y':p_y
})

path = 'Out_dev_MF\\month1\\RF_dev_1m_dist.feather'
feather.write_dataframe(all_test_MF , path)

In [11]:
# test
model = rf.rf(X_train_all, Y_train_all, mtry = mtry, tau = tau) 

y_hat, L_hat, U_hat, p_y, sig_hat, y_MC_mixture = model.predict_MC(X_test, Y_test, alpha= 0.05)

In [12]:
y_true = Y_test
all_test_MF = pd.DataFrame({
    'Index': WU2[WU2.loc[:,'Set']=='test'].loc[:,'Index'].values,
    'Y': y_true,
    'y_hat': y_hat,
    'L': np.maximum(L_hat,0 ),
    'U': U_hat,
    'sig_hat': sig_hat,
    'p_y':p_y
})

path = 'Out_test_MF\\month1\\RF_1m_dist.feather'
feather.write_dataframe(all_test_MF , path)

path = 'Out_test_MF\\month1\\RF_MC.npy'
np.save(path, y_MC_mixture)

# Report

In [13]:
r1,r2,r3,r4,r5 = get_RMSE_NLL_NOIS_AWPI_ECPI(Y_test,y_hat,np.maximum(L_hat,0 ), U_hat,alpha=0.05)

 & 1150.9 & 8.42 & 7093.05 & 4890.41 & 0.94 & 95\% \\


In [15]:
# results
with open("Results/Results_1m.txt", "a") as myfile:
    myfile.write("RF \n")
    myfile.write('RMSE %f & NLL %f & NOIS %f & AWPI %f & ECPI %f \n' % (
        r1,-np.log(p_y).mean(),r3,r4,r5 ))

# BMA

In [16]:
best_tau, rmse, nll, _ = model.predict(X_train_all, Y_train_all)
with open("Ensemble/BMA_short.txt", "a") as myfile:
    myfile.write("RF, %f \n" % (nll))