In [1]:
import sys

import pandas as pd
import matplotlib.pyplot as plt

from IPython.core.display import display, HTML

# toolPath = r'/Users/decolvin/Box Sync/UCD_ECO_coding/mypy'
# sys.path.append(toolPath)

import mnv14 as mnv

display(HTML("<style>.container { width:90% !important; }</style>"))

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 500)

print(mnv.version)

  from pandas.core import datetools


Version 1.4


### Local Data Loading

In [None]:
filePath = 'data/gbsfkilledvif.xlsx'
data = pd.read_excel(filePath, header=0, index_col=0, parse_date=True, infer_datetime_format=True)
print(data.columns)

### PI Data Loading

In [2]:
from PI_client2 import pi_client
pi = pi_client()

tags = pi.search_by_point('*shields*kbtu*')[0]
#tags += pi.search_by_point('*shields*kbtu*')[0]

print(tags)

[u'Shields_ChilledWater_Demand_kBtu', u'Shields_Electricity_Demand_kBtu', u'Shields_Steam_Demand_kBtu']


#### Pull tags

In [None]:
startDate = '2017-01-01'
endDate = '2018-01-01'
interval = '1 hour' #Can be "minute" "hour" "day"
calculation = 'calculated' # Redundant?

# data = pi.get_stream_by_point(tags, start=startDate, end=endDate, interval=interval, calculation=calculation)

# Data Section

In [None]:
dataParams = {'column': 1,
              'IQRmult' : 4.0,
              'IQR' : 'y', 
              'resampleRate' : 'D', #'D' for daily 'H' for hourly
              'OATsource' : 'file', #'self' or 'file'
              'OATname' : 'OAT', #Name of OAT column if OATsource is 'self'} #only needed with sliceType : 'ranges'
              'sliceType' : 'ranges', #half, middate, ranges
              'midDate' : '2017-01-01', #only needed with sliceType : 'middate'
              'dateRanges' : ['2016-06-01','2017-06-01','2017-08-01','2017-12-01'], #only needed with sliceType : 'ranges'
             }

dk = mnv.data_keeper(data, dataParams)
dk.default_clean()

# Plots
dk._outlier_plot()
dk._resampled_plot(yrange=(-100,dk.modifiedData[dk.com].max()*1.1))
dk._pre_post_plot()

# Many Linear Models

In [None]:
modelParams = {'params': ['CDH','HDH','month','hour','weekday'],
               'testTrainSplit': 'random',
               'randomState': None,
               'testSize': 0.2,
               'commodityRate': 0.0157,
               'paramPermuteList': ['', 'C(weekday)', 'C(month)']}

allmod = mnv.many_ols(dk.pre, dk.post, modelParams)

allmod.run_all_linear()
print(allmod.statsPool[0:5])
allmod.plot_pool(1)
modelParams['params'] = allmod.statsPool.iloc[0]['params']

# Single Linear Model

In [None]:
mod = mnv.ols_model(dk.pre, dk.post, modelParams)


mod.model_plot()
mod.calculate_kfold()
mod.calculate_vif()

print(mod.vif[mod.vif['VIF'] > 10])

print('\nKfold')
print(mod.kfoldStats)
print(mod.kfoldRelative)

plt.show() # Show plot before Stats summary
mod.Fit.summary()

### Savings

In [None]:
print("Savings = $" +str(round(mod.postCumsum * mod.params.commodityRate,1)))
mod.savings_plot(yaxis='dollars')

In [None]:
mnv.create_archive(dk, mod)
