In [None]:
# EXAMPLE 1 - Create linear regression models to predict pole transformer loading.
# Read PI data from text file published by PI Integrator for Busines Analytics.

import pandas as pd             # "pandas" for managing dataframe from published .txt file.
import numpy as np              # "numpy" for statistics.
import matplotlib.pyplot as plt # "matplot.pyplot" for graphics 
import seaborn as sns           # "seaborn" for statistical data visualization. 

# Define path relative to curent directory.
file_path = 'Pole Transformer Loads.txt'

# Read "Pole Transformer Load.txt" into the transformers dataframe. 
poleTransformerLoads = pd.read_csv(file_path,delimiter="\t")

# Print header row of dataframe.
print(poleTransformerLoads[0:0])

In [None]:
# Rename some columns with shorter names to make them easier to work with.
poleTransformerLoads.rename(columns = {'Single Phase Transformer':'Transformer'}, inplace = True )
poleTransformerLoads.rename(columns = {'Ambient Temperature':'Temperature'}, inplace = True )
poleTransformerLoads.rename(columns = {'Relative Humidity':'Humidity'}, inplace = True )
poleTransformerLoads.rename(columns = {'Wh Delivered Load':'Wh Load'}, inplace = True )
poleTransformerLoads.rename(columns = {'Wh Delivered Load - 14d':'Wh Load-14d'}, inplace = True )
poleTransformerLoads.rename(columns = {'Wh Delivered Load - 7d':'Wh Load-7d'}, inplace = True )
poleTransformerLoads.rename(columns = {'Wind Speed':'Wind'}, inplace = True )

# Define second dataframe with just data needed for our modelling.
modellingData = poleTransformerLoads[['Transformer', 'TimeStamp', 'Hour', 'Temperature', 'Humidity',
                    'Wind','Wh Load', 'Wh Load-7d', 'Wh Load-14d']]

# Peek at the first five rows to make sure things look right.
modellingData.head()

In [None]:
# Peek at the last five rows to make sure we got them all.
modellingData.tail()

In [None]:
# Gain an easy statistical perspective of the values in our dataframe.
modellingData.describe().T

In [None]:
# Generate a correlation matrix to check for the exstance of good relatinoships for our model.
model_corr = modellingData.corr(method ='pearson')
model_corr

In [None]:
# Set size of chart.
fig, ax = plt.subplots(figsize=(8,8))

# Lets supress some of this info by masking it
mask = np.zeros_like(model_corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

# Lets take a graphical view of these correlations.
sns.heatmap(data=model_corr, mask=mask, cmap='coolwarm', vmin=-1, vmax=1, linewidths=1, square=True,
    annot=True, ax=ax)

In [None]:
# In order to analyze transformers individually, we need to set the datframe's index to the "Transformer" column. 
modellingData = modellingData.set_index("Transformer", drop=False)

# Take a look, see the difference?
modellingData.head()

In [None]:
# Here's how we can focus on one transformer, say PT_XYZ0358.
modellingData.loc["PT_XYZ0358",:].head()

In [None]:
# Redo the correlatioin matrix for just PT_XYZ0358.
model_corr = modellingData.loc["PT_XYZ0358",:].corr(method ='pearson')

fig, ax = plt.subplots(figsize=(8,8))

# Lets supress some of this info by masking it
mask = np.zeros_like(model_corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

# Lets take a graphical view of these correlations.
sns.heatmap(data=model_corr, mask=mask, cmap='coolwarm', vmin=-1, vmax=1, linewidths=1, square=True,
    annot=True, ax=ax)

In [None]:
# Import the linear regression model from the scikit-learn package.
from sklearn.linear_model import LinearRegression

# Create linear regression object
LinReg = LinearRegression()

# Create dataset of just PT_XYZ0358 data.
transformer_0358 = modellingData.loc["PT_XYZ0358",:]

# Perform linear regression fit - four terms.
LinReg.fit(transformer_0358[["Wh Load-7d","Wh Load-14d","Temperature","Humidity"]],transformer_0358["Wh Load"])

# Print equation.
print("Eq:\n", LinReg.coef_[0], "*","'Wh Delivered Load - 7d' + ",
      LinReg.coef_[1], "*","'Wh Delivered Load - 14d' + ",
      LinReg.coef_[2], "*", "'Ambient Temperature' +",
      LinReg.coef_[3], "*","'Relative Humidity' +(",LinReg.intercept_, ")" )