In [1]:
# Initial Imports
import requests
import pandas as pd
import json
import numpy as np
from path import Path
from datetime import datetime
from datetime import timedelta
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
# Import API Key
from config import key

# Documentation:
#https://www.worldweatheronline.com/developer/my/analytics.aspx?key_id=222419

In [3]:
# Make a request to the worldweatheronline local history weather API page
def makeARequest(location, startDate, endDate, yourAPIKey):
    baseURL = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx"
    timeInterval = "1"
    outputFormatToReturn = "json"

    requestURL = f"{baseURL}?q={location}&date={startDate}&enddate={endDate}&tp={timeInterval}&format={outputFormatToReturn}&key={yourAPIKey}"
    
    response = requests.get(requestURL)

    if response.status_code == 200:
        responseJson = response.json()
        return responseJson
    else:
        return print(response.status_code)

In [4]:
# Pull the wind variables from the responseJson 
def monthlyHistoricalWeather(firstDayOfMonth, lastDayOfMonth, jsonResponse):
    
    first = datetime.strptime(firstDayOfMonth, '%Y-%m-%d')
    last = datetime.strptime(lastDayOfMonth, '%Y-%m-%d')
    numberOfDays = last.day - first.day

    HourlyHistoricalWeather = []
    
    for day in np.arange(0,numberOfDays + 1,1):
        for hour in np.arange(0,24,1):
            HourlyHistoricalWeather.append({
                "Date" : jsonResponse["data"]["weather"][day]["date"],
                "Time" : jsonResponse["data"]["weather"][day]["hourly"][hour]["time"],
                "WindSpeed(mph)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["windspeedMiles"],
                "WindDirection(Degrees)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["winddirDegree"],
                "WindDirection(Compass)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["winddir16Point"],
                "WindGust(mph)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["WindGustMiles"]
            })

    return HourlyHistoricalWeather

In [5]:

# Store the variables in a DataFrame
def monthlyHistoricalWeatherDF(month):
    weatherDataFrame = pd.DataFrame(month)
    return weatherDataFrame

In [6]:
# Define the Latitude and longitude of Hackberry Wind Farm
    # https://www.thewindpower.net/windfarm_en_4012_hackberry.php
    # Latitude: 32.776111
    # Longitude: -99.476444
latLong = "32.776111,-99.476444"

In [7]:
# January
date = "2019-01-01"
enddate = "2019-01-31"

responseJson = makeARequest(latLong, date, enddate, key)

January = monthlyHistoricalWeather(date, enddate, responseJson)
JanuaryDF = monthlyHistoricalWeatherDF(January)
JanuaryDF.head()

Unnamed: 0,Date,Time,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph)
0,2019-01-01,0,12,126,SE,24
1,2019-01-01,100,13,89,E,23
2,2019-01-01,200,14,53,NE,23
3,2019-01-01,300,15,17,NNE,22
4,2019-01-01,400,14,18,NNE,21


In [8]:
# February
date = "2019-02-01"
enddate = "2019-02-28"

responseJson = makeARequest(latLong, date, enddate, key)

February = monthlyHistoricalWeather(date, enddate, responseJson)
FebruaryDF = monthlyHistoricalWeatherDF(February)

In [9]:

# March
date = "2019-03-01"
enddate = "2019-03-31"

responseJson = makeARequest(latLong, date, enddate, key)

March = monthlyHistoricalWeather(date, enddate, responseJson)
MarchDF = monthlyHistoricalWeatherDF(March)

In [10]:
# April
date = "2019-04-01"
enddate = "2019-04-30"

responseJson = makeARequest(latLong, date, enddate, key)

April = monthlyHistoricalWeather(date, enddate, responseJson)
AprilDF = monthlyHistoricalWeatherDF(April)

In [None]:

# May
date = "2019-05-01"
enddate = "2019-05-31"

responseJson = makeARequest(latLong, date, enddate, key)

May = monthlyHistoricalWeather(date, enddate, responseJson)
MayDF = monthlyHistoricalWeatherDF(May)

In [None]:
# June
date = "2019-06-01"
enddate = "2019-06-30"

responseJson = makeARequest(latLong, date, enddate, key)

June = monthlyHistoricalWeather(date, enddate, responseJson)
JuneDF = monthlyHistoricalWeatherDF(June)

In [None]:

# July
date = "2019-07-01"
enddate = "2019-07-31"

responseJson = makeARequest(latLong, date, enddate, key)

July = monthlyHistoricalWeather(date, enddate, responseJson)
JulyDF = monthlyHistoricalWeatherDF(July)

In [None]:
# August
date = "2019-08-01"
enddate = "2019-08-31"

responseJson = makeARequest(latLong, date, enddate, key)

August = monthlyHistoricalWeather(date, enddate, responseJson)
AugustDF = monthlyHistoricalWeatherDF(August)

In [None]:
# September
date = "2019-09-01"
enddate = "2019-09-30"

responseJson = makeARequest(latLong, date, enddate, key)

September = monthlyHistoricalWeather(date, enddate, responseJson)
SeptemberDF = monthlyHistoricalWeatherDF(September)

In [None]:
# October
date = "2019-10-01"
enddate = "2019-10-31"

responseJson = makeARequest(latLong, date, enddate, key)

October = monthlyHistoricalWeather(date, enddate, responseJson)
OctoberDF = monthlyHistoricalWeatherDF(October)

In [None]:
# November
date = "2019-11-01"
enddate = "2019-11-30"

responseJson = makeARequest(latLong, date, enddate, key)

November = monthlyHistoricalWeather(date, enddate, responseJson)
NovemberDF = monthlyHistoricalWeatherDF(November)

In [None]:
# December
date = "2019-12-01"
enddate = "2019-12-31"

responseJson = makeARequest(latLong, date, enddate, key)

December = monthlyHistoricalWeather(date, enddate, responseJson)
DecemberDF = monthlyHistoricalWeatherDF(December)

In [None]:
# Combine each month into a single DataFrame
hourlyWeatherDF2019 = JanuaryDF.append([FebruaryDF, MarchDF, AprilDF, MayDF, JuneDF, JulyDF, AugustDF, SeptemberDF, OctoberDF, NovemberDF, DecemberDF]) 
index = np.arange(0,24*365,1)
hourlyWeatherDF2019 = hourlyWeatherDF2019.set_index(index)
hourlyWeatherDF2019

In [None]:
hourlyWeatherDF2019.dtypes

# Performing ETL on Wind Data at Hackberry Wind Farm

In [None]:
hourlyWeatherDF2019['Date'] = pd.to_datetime(hourlyWeatherDF2019['Date'] )

In [None]:
hourlyWeatherDF2019.dtypes

In [None]:
hourlyWeatherDF2019['Time'] = hourlyWeatherDF2019['Time'].astype(int)

In [None]:
hourlyWeatherDF2019['Time'] = hourlyWeatherDF2019['Time']/100

In [None]:
hourlyWeatherDF2019['Time'] = hourlyWeatherDF2019['Time'].astype(int)

In [None]:
hourlyWeatherDF2019['time'] =pd.to_timedelta(hourlyWeatherDF2019['Time'], unit='h')

In [None]:
# Creating new column called 'hour' in timedelta format without date
hourlyWeatherDF2019['time'] = hourlyWeatherDF2019['time'] - pd.to_timedelta(hourlyWeatherDF2019['time'].dt.days, unit='d')

In [None]:
hourlyWeatherDF2019.dtypes

In [None]:
# Combining Date and hour to a single column as they are in datetime and timedelta formats
hourlyWeatherDF2019['time_combined'] = hourlyWeatherDF2019['Date'] + hourlyWeatherDF2019['time']

In [None]:
# Dropping the original 'Time' column from 
hourlyWeatherDF2019 = hourlyWeatherDF2019.drop(['Time', 'Date', 'time'], 1)

In [None]:
# Renaming column names.
hourlyWeatherDF2019.rename(columns = {'time_combined':'time', 'WindSpeed(mph)':'WindSpeed_mph', 'WindDirection(Degrees)':'WindDirection_degrees', 'WindDirection(Compass)':'WindDirection_compass', 'WindGust(mph)':'WindGust_mph'}, inplace = True)

In [None]:
# Re-arranging the column headers
hourlyWeatherDF2019 = hourlyWeatherDF2019[['time', 'WindSpeed_mph', 'WindDirection_degrees', 'WindDirection_compass', 'WindGust_mph']]

In [None]:
hourlyWeatherDF2019['WindSpeed_mph'] = hourlyWeatherDF2019['WindSpeed_mph'].astype(int)

In [None]:
hourlyWeatherDF2019['WindDirection_degrees'] = hourlyWeatherDF2019['WindDirection_degrees'].astype(int)

In [None]:
hourlyWeatherDF2019['WindGust_mph'] = hourlyWeatherDF2019['WindGust_mph'].astype(int)

In [None]:
weather2019Clean = hourlyWeatherDF2019.drop(hourlyWeatherDF2019.index[0])

In [None]:
weather2019Clean.to_csv(r'Output/weather_wind.csv', index = False)

In [None]:
weather2019Clean.copy()

# Importing and Performing ETL on Hackberry Wind Energy Data

In [None]:
data = "Resources/Hackberry_Generation.csv"
Hackberry_df = pd.read_csv(data)
Hackberry_df

In [None]:
Hackberry_df.dtypes

In [None]:
# Dropping Unit Column
Hackberry_df = Hackberry_df.drop('Unit', 1)

In [None]:
# Renaming column name from 'Hour Ending' to 'Hour'
Hackberry_df.rename(columns = {'Hour Ending':'Hour'}, inplace = True)

In [None]:
# Converting Date to datetime
Hackberry_df['Date'] = pd.to_datetime(Hackberry_df['Date'], format='%Y%m%d')

In [None]:
# Checking data type
Hackberry_df.dtypes

In [None]:
# Removing the last two characters from the hour column as the times are hourly
Hackberry_df['Hour'] = Hackberry_df['Hour'].astype(str).str[:-2].astype(np.int64)

In [None]:
# Checking Hour data type
Hackberry_df.dtypes

In [None]:
Hackberry_df['hour'] =pd.to_timedelta(Hackberry_df['Hour'], unit='h')

In [None]:
# Creating new column called 'hour' in timedelta format without date
Hackberry_df['hour'] = Hackberry_df['hour'] - pd.to_timedelta(Hackberry_df['hour'].dt.days, unit='d')

In [None]:
# Dropping the original 'Hour' column
Hackberry_df = Hackberry_df.drop('Hour', 1)

In [None]:
# Re-arranging the column headers
Hackberry_df = Hackberry_df[["Date", "hour", "MWH"]]

In [None]:
Hackberry_df.dtypes

In [None]:
# Combining Date and hour to a single column as they are in datetime and timedelta formats
Hackberry_df['time'] = Hackberry_df['Date'] + Hackberry_df['hour']

In [None]:
# Re-arranging the column headers and removing the Date and hour columns
Hackberry_df = Hackberry_df[["time", "MWH"]]

In [None]:
Hackberry_df.dtypes

In [None]:
Hackberry_df.copy()

In [None]:
# Removing rows to include only 2019 year data to include same time range as wind data
Hackberry2019 = Hackberry_df.drop(Hackberry_df.index[8759:13871])

In [None]:
Hackberry2019.to_csv(r'Output/Hackberry_MHW.csv', index = False)

## Merging Hackberry Energy Data with Wind Data

In [None]:
hackberryWindMWH = pd.merge(weather2019Clean, Hackberry2019, on='time', how='outer')

In [None]:
hackberryWindMWH =hackberryWindMWH.drop(hackberryWindMWH.index[8760])

In [None]:
hackberryWindMWH.to_csv(r'Output/Hackberry_Wind_MHW.csv', index = False)

In [None]:
hackberryWindMWH.copy()

In [None]:
hackberry = hackberryWindMWH.dropna()

## Linear Regression

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from pathlib import Path
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [None]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [None]:
# Visually inspect the data
plt.scatter(hackberry.WindSpeed_mph, hackberry.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MHW')
plt.show()

In [None]:
# Visually inspect the data
plt.scatter(hackberry.WindDirection_degrees, hackberry.MWH)
plt.xlabel('Wind Direction')
plt.ylabel('MHW')
plt.show()

In [None]:
# Visually inspect the data
plt.scatter(hackberry.WindGust_mph, hackberry.MWH)
plt.xlabel('Wind Gust')
plt.ylabel('MHW')
plt.show()

##  It can be concluded that linear regression on wind speed alone does not track the power generated.  Direction of wind needs to be included.

In [None]:
# Creating dataframe with wind direction blowing east
windEast = hackberry[hackberry.WindDirection_compass == 'E']

In [None]:
# Visually inspect the data of wind speed to MWH for winds blowing east.
plt.scatter(windEast.WindSpeed_mph, windEast.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

In [None]:
# Creating dataframe with wind direction blowing West
windWest = hackberry[hackberry.WindDirection_compass == 'W']

In [None]:
# Visually inspect the data of wind speed to MWH for winds blowing West.
plt.scatter(windWest.WindSpeed_mph, windWest.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

In [None]:
# Creating dataframe with wind direction blowing south
windSouth = hackberry[hackberry.WindDirection_compass == 'S']

In [None]:
# Visually inspect the data of wind speed to MWH for winds blowing South.
plt.scatter(windSouth.WindSpeed_mph, windSouth.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

In [None]:
# Creating dataframe with wind direction blowing North
windNorth = hackberry[hackberry.WindDirection_compass == 'N']

In [None]:
# Visually inspect the data of wind speed to MWH for winds blowing North.
plt.scatter(windNorth.WindSpeed_mph, windNorth.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

In [None]:
# Creating dataframe with wind direction blowing NE
windNE = hackberry[hackberry.WindDirection_compass == 'NE']

In [None]:
# Visually inspect the data of wind speed to MWH for winds blowing NE.
plt.scatter(windNE.WindSpeed_mph, windNE.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

In [None]:
# Creating dataframe with wind direction blowing NW
windNW = hackberry[hackberry.WindDirection_compass == 'NW']

In [None]:
# Visually inspect the data of wind speed to MWH for winds blowing NW.
plt.scatter(windNW.WindSpeed_mph, windNW.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

In [None]:
# Creating dataframe with wind direction blowing NNE
windNNE = hackberry[hackberry.WindDirection_compass == 'NNE']

In [None]:
# Visually inspect the data of wind speed to MWH for winds blowing NNE.
plt.scatter(windNNE.WindSpeed_mph, windNNE.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

In [None]:
# Creating dataframe with wind direction blowing NNW
windNNW = hackberry[hackberry.WindDirection_compass == 'NNW']

In [None]:
# Visually inspect the data of wind speed to MWH for winds blowing NNW.
plt.scatter(windNNW.WindSpeed_mph, windNNW.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

In [None]:
# Creating a dictionary with wind direction
#directions = {
#    "N": 1,
#   "S": 2,
#   "E": 3,
#   "W": 4,
#   "NE": 5,
#  "NW": 6,
#   "NNE": 7,
#   "NNW": 8,
#   "SE": 11,
#   "SW": 12,
#   "SSE": 13,
#    "SSW":14,
#    "WNW":15,
#    "WSW":16,
#    "ENE":17,
#    "ESE":18,
    
#}

In [None]:
#hackberry["direction"] = hackberry["WindDirection_compass"].apply(lambda x: directions[x])

In [None]:
hackberry.to_csv(r'Output/hackberry_encoded.csv', index = False)

In [None]:
data = "Output/hackberry_encoded.csv"
hackberry_encoded = pd.read_csv(data)
hackberry_encoded

In [None]:
#hackberryEncoded = hackberry_encoded.drop(["time", "WindDirection_compass", "direction"], axis=1)

In [None]:
hackberryEncoded = hackberry_encoded.drop(["time", "WindDirection_compass", "WindDirection_compass"], axis=1)

In [None]:
hackberryEncoded["WindSpeed_mph"] = hackberryEncoded["WindSpeed_mph"].round(0).astype(int)

In [None]:
hackberryEncoded["WindDirection_degrees"] = hackberryEncoded["WindDirection_degrees"].round(0).astype(int)

In [None]:
hackberryEncoded["WindGust_mph"] = hackberryEncoded["WindGust_mph"].round(0).astype(int)

In [None]:
hackberryEncoded["MWH"] = hackberryEncoded["MWH"].round(0).astype(int)

In [None]:
hackberryEncoded.dtypes

In [None]:
# Creating the scaler instance
from sklearn.preprocessing import StandardScaler
data_scaler = StandardScaler()

In [None]:
hackberry_scaled = data_scaler.fit_transform(hackberryEncoded)

In [None]:
hackberry_scaled[:5]

In [None]:
print(np.mean(hackberry_scaled[:,0]))
print(np.std(hackberry_scaled[:,0]))

In [None]:
# Define the features set.
X = hackberryEncoded.copy()
X = X.drop("MWH", axis=1)
X.head()

In [None]:
# Define the target set.
y = hackberryEncoded["MWH"].ravel()
y[:5]

In [None]:
#split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [None]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Determine the shape of training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
# Resample the training data with the RandomOversampler
from imblearn.ensemble import BalancedRandomForestClassifier
brfc = BalancedRandomForestClassifier(n_estimators=100, random_state=1)
brfc.fit(X_train, y_train)
Counter(y_train)

In [None]:
# Calculated the balanced accuracy score
y_pred = brfc.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

In [None]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

In [None]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

In [None]:
# List the features sorted in descending order by feature importance
importances = brfc.feature_importances_
sorted(zip(brfc.feature_importances_, X.columns), reverse=True)