## Predicting Energy Consumption from PJM (in MW)

Will also be comparing energy consumption from the East and West Regions of the United States as well as their projected energy consumption.

#### Functions & Imports

In [45]:
import pandas as pd
import seaborn as sbn
import plotly as plt
import scipy.optimize as sp
import math as m
import numpy as np

In [46]:
def lagrange(x, y, xx):
    '''
    Lagrange Interpolating Polynomial
    Uses n-1 order lagrange interpolating polynomial based on n number of data points to 
    return a value of the dependent variable yint given the independent variable xx

    Input:
        x => array of independent variable values
        y => array of dependent variable values
        xx => desired independent variable to interpolate

    Output:
        yint => interpolated value
    '''

    # Checking to see if everything is the same length
    n = len(x)
    if len(y) != n:
        return 'X and Y must be the same length'

    # Creating a placeholder
    s = 0

    for i in range(n):
        product = y[i]

        for j in range(n):
            if i != j:
                # This is the weighting equation => L (see slides)
                product *= (xx - x[j]) / (x[i] - x[j])
        s += product

    yint = s
    return yint

In [47]:
# Importing the csv files and assigning them as dataframes
dfw = pd.read_csv('PJMW_hourly.csv')
dfe = pd.read_csv('PJME_hourly.csv')
dfw

Unnamed: 0,Datetime,PJMW_MW
0,2002-12-31 01:00:00,5077.0
1,2002-12-31 02:00:00,4939.0
2,2002-12-31 03:00:00,4885.0
3,2002-12-31 04:00:00,4857.0
4,2002-12-31 05:00:00,4930.0
...,...,...
143201,2018-01-01 20:00:00,8401.0
143202,2018-01-01 21:00:00,8373.0
143203,2018-01-01 22:00:00,8238.0
143204,2018-01-01 23:00:00,7958.0


#### West-region

In [52]:
# Setting values to floats for 'PJMW_MW'
pd.to_numeric(dfw['PJMW_MW'])

usedYears = [] # Creating list of already used years
year = 0 # init the year iter var

valsPJMW = [] # Temp. list for averaging PJMW_MW vals per year
avgs = [] # avg PJMW vals
tmpAVG = 0

# Looping through 'dfw' to sort the data by year and create model
for row in dfw.itertuples():
    year = row[1][0:4] # ind1 -> tuple key | ind2 -> tuple value
    if year in usedYears:
        valsPJMW.append(row[2])
    elif year not in usedYears: # checks for start of new year
        if not usedYears: # check for start of loop
            usedYears.append(year) # appends curr year to usedYears list
            continue
        usedYears.append(year) # appends curr year to usedYears list
        tmpAVG = np.average(valsPJMW) # averages PJMW vals per year before clearing valsPJMW
        avgs.append(round(tmpAVG, 2))
        valsPJMW.clear() # clears PJMW vals for new year
        
usedYears.pop(-1) # removes year 2018 as there are few PJMW vals for it 
for i in range(len(usedYears)): # type converts usedYears vals from str -> int
    usedYears[i] = int(usedYears[i])

Regression, Interpolation, and Visualization

In [74]:
years = np.array(usedYears) # converting lists to array
avgPJMWs = np.array(avgs) # converting lists to array

p = np.polyfit(years, avgPJMWs, 1) # 3rd order regression
a = p[0] # parameters (coeff) of regression equation -> 1st param
b = p[1] # parameters (coeff) of regression equation -> 2nd param

In [76]:
def year_vs_PJMW_USWest(x): # defining equation obtained from regression of data
    return a*x**2 + b*x

In [None]:
newYears = [] # temp lists for new years for regression equation
newPJMWs = [] # temp lists for new PJMWs from regression equation

for year in range(len(usedYears)):
    

#### East-region

In [None]:
# Using numerical methods to create a model from each dataset
