In [None]:
## Options
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 500)

%load_ext autoreload
%autoreload 2

In [None]:
## Imports

## Mac imports
import sys
toolPath = r'/Users/decolvin/Box Sync/UCD_ECO_coding/mypy'
sys.path.append(toolPath)

## Main Imports

import mnv13 as mnv
print(mnv.version)

from PI_client import pi_client
pi = pi_client()
print("Success!")

### Local Data Loading

In [None]:
# filePath = 'data/OATtest.xlsx'
filePath = r'/Users/decolvin/Downloads/GBSF Data 061318.csv'
#filePath = 'data/arc2yeardata.xlsx'
#data = pd.read_excel(filePath, header=0, index_col=0, parse_date=True)
data = pd.read_csv(filePath, header=0, index_col=0, parse_dates=True)
print(data.columns)

### PI Data Loading

In [None]:
tags = pi.search_by_point('*shields*kbtu*')[0]
#tags += pi.search_by_point('*shields*kbtu*')[0]

print(tags)

#### Pull tags

In [None]:
tags_selected = tags

startDate = '2017-01-01'
endDate = '2018-01-01'

interval = '1 hour' #Can be "minute" "hour" "day"

calculation = 'calculated' # Redundant?

# data = pi.get_stream_by_point(tags_selected, start=startDate, end=endDate, interval=interval, calculation=calculation)

# Data Section

In [None]:
data.head()

In [None]:
data.rename(columns={data.columns[0]:"response"},inplace=True)

inputDict = {'IQRmult' : 4.0,
             'IQR' : 'y', 
             'resampleRate' : 'D', #'D' for daily 'H' for hourly
             'verbosity' : 3,
             'sliceType' : 'ranges', #half, middate, ranges
             'midDate' : '2017-01-01', #only needed with sliceType : 'middate'
             'dateRanges' : ['2016-01-01','2018-01-01','2018-04-01','2018-04-01'], #only needed with sliceType : 'ranges'
             'OATsource' : 'file', #'self' or 'file'
             'OATname' : 'OAT', #Name of OAT column if OATsource is 'self'} #only needed with sliceType : 'ranges'
            }

dc = mnv.data_handler(data['response'], inputDict)
dc.default_clean()

## Run the next line incase the y-range is FUBAR
dc._outlier_plot(dc.modifiedData[dc.com], yrange=(-100,dc.modifiedData[dc.com].max()*1.1)) 

# plot 3
dc._pre_post_plot()

# Many Linear Models

In [None]:
dc.pre.columns

In [None]:
modelDict = {'params': 'CDH + HDH + C(month) + C(weekday)',
             'testTrainSplit': 'random',
             'randomState': 123,
             'testSize': 0.2,
             'commodityRate': 0.056,
             'paramList': ['','C(month)','C(weekday)', 'C(hour)']}

allmod = mnv.many_ols(dc.pre, dc.post, modelDict)

allmod.run_all_linear()
print(allmod.statsPool[0:5])
allmod.plot_pool(1)

# Single Linear Model

In [None]:
modelDict['params'] = allmod.statsPool.iloc[0]['params']
mod = mnv.ols_model(dc.pre, dc.post, modelDict)
mod.model_plot()


plt.show() # Show plot before Stats summary
print(mod.vif)
#mod.Fit.summary()

In [None]:
mod.kfold()
print('')
print("Stats over each fold:") 
print(mod.kfoldStats)
print('')
print("Relative error over each fold")
print(mod.kfoldRelative.round(1))
print('')
print("Accuracy (Mean): %0.2f (+/- %0.2f)" % (np.sqrt(mod.kfoldStats.mean()[2]), np.sqrt(mod.kfoldStats.std()[2])))
print("Accuracy (Hi/Lo): %0.2f, %0.2f)" % (np.sqrt(mod.kfoldStats.mean()[2])+np.sqrt(mod.kfoldStats.std()[2]), np.sqrt(mod.kfoldStats.mean()[2])-np.sqrt(mod.kfoldStats.std()[2])))

In [None]:
print ("CV CVRMSE: %.4f") %(np.sqrt(mod.kfoldRelative[5])/dc.pre.response.mean())

In [None]:
high = dc.pre.response + 742
low = dc.pre.response - 495.58

mod.Fit.conf_int.plot(figsize=(18,5))

plt.show()

### Savings

In [None]:
#rates = {'chw':0.056, 'steam':0.0157, 'elec':0.030} #kBTU prices for 2016(?)
#commodity = 'chw'

print("Savings = $" +str(round(mod.postCumsum * mod.params.commodityRate,2)))
mod.savings_plot(yaxis='dollars')