In [1]:
# library
import pandas as pd
import os 
import feather
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import logsumexp
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer
from models import rf
from utils import *


In [2]:
# read in 
WU = feather.read_dataframe('Data/WU.feather')
WU2 = WU
#pp
WU = WU.drop(columns=['Index','year','ParcelID','Days','year','month'])


WU_train = WU[WU['Set'] == 'train'].drop(columns=['Set'])
WU_dev = WU[WU['Set'] == 'dev'].drop(columns=['Set'])
WU_test = WU[WU['Set'] == 'test'].drop(columns=['Set'])

WU_train_all =  WU[WU['Set'] != 'test'].drop(columns=['Set'])

predictors = list( WU_train.drop(columns=['TotalWaterUse']).columns)

X_train = WU_train.drop(columns=['TotalWaterUse']).values
Y_train = WU_train.loc[:,'TotalWaterUse'].values

X_dev = WU_dev.drop(columns=['TotalWaterUse']).values
Y_dev = WU_dev.loc[:,'TotalWaterUse'].values

X_test = WU_test.drop(columns=['TotalWaterUse']).values
Y_test = WU_test.loc[:,'TotalWaterUse'].values

X_train_all = WU_train_all.drop(columns=['TotalWaterUse']).values
Y_train_all = WU_train_all.loc[:,'TotalWaterUse'].values


# UQRF

reg, tau, mtry, nll = rf.gridRF(X_train, Y_train, X_dev, Y_dev)

In [3]:
# dev
tau, mtry = 1.86920166015625e-06, 5

model = rf.rf(X_train, Y_train, mtry = mtry, tau = tau) 

y_hat, L_hat, U_hat, p_y, sig_hat, _ = model.predict_MC(X_dev, Y_dev, alpha= 0.05)

In [4]:
y_true = Y_dev
all_test_MF = pd.DataFrame({
    'Index': WU2[WU2.loc[:,'Set']=='dev'].loc[:,'Index'].values,
    'Y': y_true,
    'y_hat': y_hat,
    'L': np.maximum(L_hat,0 ),
    'U': U_hat,
    'sig_hat': sig_hat,
    'p_y':p_y
})

path = 'Out_dev_MF\\month12\\RF_dev_12m_dist.feather'
feather.write_dataframe(all_test_MF , path)

In [5]:
# test
model = rf.rf(X_train_all, Y_train_all, mtry = mtry, tau = tau) 

y_hat, L_hat, U_hat, p_y, sig_hat, y_MC_mixture = model.predict_MC(X_test, Y_test, alpha= 0.05)

In [6]:
y_true = Y_test
all_test_MF = pd.DataFrame({
    'Index': WU2[WU2.loc[:,'Set']=='test'].loc[:,'Index'].values,
    'Y': y_true,
    'y_hat': y_hat,
    'L': np.maximum(L_hat,0 ),
    'U': U_hat,
    'sig_hat': sig_hat,
    'p_y':p_y
})

path = 'Out_test_MF\\month12\\RF_12m_dist.feather'
feather.write_dataframe(all_test_MF , path)

path = 'Out_test_MF\\month12\\RF_MC.npy'
np.save(path, y_MC_mixture)

# Report

In [7]:
r1,r2,r3,r4,r5 = get_RMSE_NLL_NOIS_AWPI_ECPI(Y_test,y_hat,np.maximum(L_hat,0 ), U_hat,alpha=0.05)

 & 2562.25 & 9.19 & 13209.34 & 9258.59 & 0.93 & 95\% \\


In [8]:
# results
with open("Results/Results_12m.txt", "a") as myfile:
    myfile.write("RF \n")
    myfile.write('RMSE %f & NLL %f & NOIS %f & AWPI %f & ECPI %f \n' % (
        r1,-np.log(p_y).mean(),r3,r4,r5 ))

# BMA

In [9]:
best_tau, rmse, nll, _ = model.predict(X_train_all, Y_train_all)
with open("Ensemble/BMA_long.txt", "a") as myfile:
    myfile.write("RF, %f \n" % (nll))