In [1]:
# Initial Imports
import requests
import pandas as pd
import json
import numpy as np
from datetime import datetime
from datetime import timedelta

In [2]:
# Import API Key
from config import key

# Documentation:
#https://www.worldweatheronline.com/developer/my/analytics.aspx?key_id=222419

In [3]:
# Make a request to the worldweatheronline local history weather API page
def makeARequest(location, startDate, endDate, yourAPIKey):
    baseURL = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx"
    timeInterval = "1"
    outputFormatToReturn = "json"

    requestURL = f"{baseURL}?q={location}&date={startDate}&enddate={endDate}&tp={timeInterval}&format={outputFormatToReturn}&key={yourAPIKey}"
    
    response = requests.get(requestURL)

    if response.status_code == 200:
        responseJson = response.json()
        return responseJson
    else:
        return print(response.status_code)

In [4]:
# Pull the wind variables from the responseJson 
def monthlyHistoricalWeather(firstDayOfMonth, lastDayOfMonth, jsonResponse):
    
    first = datetime.strptime(firstDayOfMonth, '%Y-%m-%d')
    last = datetime.strptime(lastDayOfMonth, '%Y-%m-%d')
    numberOfDays = last.day - first.day

    HourlyHistoricalWeather = []
    
    for day in np.arange(0,numberOfDays + 1,1):
        for hour in np.arange(0,24,1):
            HourlyHistoricalWeather.append({
                "Date" : jsonResponse["data"]["weather"][day]["date"],
                "Time" : jsonResponse["data"]["weather"][day]["hourly"][hour]["time"],
                "WindSpeed(mph)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["windspeedMiles"],
                "WindDirection(Degrees)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["winddirDegree"],
                "WindDirection(Compass)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["winddir16Point"],
                "WindGust(mph)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["WindGustMiles"]
            })

    return HourlyHistoricalWeather

In [5]:

# Store the variables in a DataFrame
def monthlyHistoricalWeatherDF(month):
    weatherDataFrame = pd.DataFrame(month)
    return weatherDataFrame

In [6]:
# Define the Latitude and longitude of Hackberry Wind Farm
    # https://www.thewindpower.net/windfarm_en_4012_hackberry.php
    # Latitude: 32.776111
    # Longitude: -99.476444
latLong = "32.776111,-99.476444"

In [7]:
# January
date = "2019-01-01"
enddate = "2019-01-31"

responseJson = makeARequest(latLong, date, enddate, key)

January = monthlyHistoricalWeather(date, enddate, responseJson)
JanuaryDF = monthlyHistoricalWeatherDF(January)
JanuaryDF.head()

Unnamed: 0,Date,Time,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph)
0,2019-01-01,0,12,126,SE,24
1,2019-01-01,100,13,89,E,23
2,2019-01-01,200,14,53,NE,23
3,2019-01-01,300,15,17,NNE,22
4,2019-01-01,400,14,18,NNE,21


In [8]:
# February
date = "2019-02-01"
enddate = "2019-02-28"

responseJson = makeARequest(latLong, date, enddate, key)

February = monthlyHistoricalWeather(date, enddate, responseJson)
FebruaryDF = monthlyHistoricalWeatherDF(February)

In [9]:

# March
date = "2019-03-01"
enddate = "2019-03-31"

responseJson = makeARequest(latLong, date, enddate, key)

March = monthlyHistoricalWeather(date, enddate, responseJson)
MarchDF = monthlyHistoricalWeatherDF(March)

In [10]:
# April
date = "2019-04-01"
enddate = "2019-04-30"

responseJson = makeARequest(latLong, date, enddate, key)

April = monthlyHistoricalWeather(date, enddate, responseJson)
AprilDF = monthlyHistoricalWeatherDF(April)

In [11]:

# May
date = "2019-05-01"
enddate = "2019-05-31"

responseJson = makeARequest(latLong, date, enddate, key)

May = monthlyHistoricalWeather(date, enddate, responseJson)
MayDF = monthlyHistoricalWeatherDF(May)

In [12]:
# June
date = "2019-06-01"
enddate = "2019-06-30"

responseJson = makeARequest(latLong, date, enddate, key)

June = monthlyHistoricalWeather(date, enddate, responseJson)
JuneDF = monthlyHistoricalWeatherDF(June)

In [13]:

# July
date = "2019-07-01"
enddate = "2019-07-31"

responseJson = makeARequest(latLong, date, enddate, key)

July = monthlyHistoricalWeather(date, enddate, responseJson)
JulyDF = monthlyHistoricalWeatherDF(July)

In [14]:
# August
date = "2019-08-01"
enddate = "2019-08-31"

responseJson = makeARequest(latLong, date, enddate, key)

August = monthlyHistoricalWeather(date, enddate, responseJson)
AugustDF = monthlyHistoricalWeatherDF(August)

In [15]:
# September
date = "2019-09-01"
enddate = "2019-09-30"

responseJson = makeARequest(latLong, date, enddate, key)

September = monthlyHistoricalWeather(date, enddate, responseJson)
SeptemberDF = monthlyHistoricalWeatherDF(September)

In [16]:
# October
date = "2019-10-01"
enddate = "2019-10-31"

responseJson = makeARequest(latLong, date, enddate, key)

October = monthlyHistoricalWeather(date, enddate, responseJson)
OctoberDF = monthlyHistoricalWeatherDF(October)

In [17]:
# November
date = "2019-11-01"
enddate = "2019-11-30"

responseJson = makeARequest(latLong, date, enddate, key)

November = monthlyHistoricalWeather(date, enddate, responseJson)
NovemberDF = monthlyHistoricalWeatherDF(November)

In [18]:
# December
date = "2019-12-01"
enddate = "2019-12-31"

responseJson = makeARequest(latLong, date, enddate, key)

December = monthlyHistoricalWeather(date, enddate, responseJson)
DecemberDF = monthlyHistoricalWeatherDF(December)

In [19]:
# Combine each month into a single DataFrame
hourlyWeatherDF2019 = JanuaryDF.append([FebruaryDF, MarchDF, AprilDF, MayDF, JuneDF, JulyDF, AugustDF, SeptemberDF, OctoberDF, NovemberDF, DecemberDF]) 

hourlyWeatherDF2019

Unnamed: 0,Date,Time,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph)
0,2019-01-01,0,12,126,SE,24
1,2019-01-01,100,13,89,E,23
2,2019-01-01,200,14,53,NE,23
3,2019-01-01,300,15,17,NNE,22
4,2019-01-01,400,14,18,NNE,21
...,...,...,...,...,...,...
739,2019-12-31,1900,6,175,S,12
740,2019-12-31,2000,6,176,S,13
741,2019-12-31,2100,7,176,S,14
742,2019-12-31,2200,7,176,S,15


In [20]:
hourlyWeatherDF2019.dtypes

Date                      object
Time                      object
WindSpeed(mph)            object
WindDirection(Degrees)    object
WindDirection(Compass)    object
WindGust(mph)             object
dtype: object

In [21]:
hourlyWeatherDF2019['Time'] = hourlyWeatherDF2019['Time'].astype(str).astype(int)

In [22]:
hourlyWeatherDF2019.dtypes

Date                      object
Time                       int32
WindSpeed(mph)            object
WindDirection(Degrees)    object
WindDirection(Compass)    object
WindGust(mph)             object
dtype: object

In [23]:
# Removing the last two characters from the hour column as the times are hourly
hourlyWeatherDF2019['Time'] = hourlyWeatherDF2019['Time'].astype(np.int64)

In [24]:
hourlyWeatherDF2019

Unnamed: 0,Date,Time,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph)
0,2019-01-01,0,12,126,SE,24
1,2019-01-01,100,13,89,E,23
2,2019-01-01,200,14,53,NE,23
3,2019-01-01,300,15,17,NNE,22
4,2019-01-01,400,14,18,NNE,21
...,...,...,...,...,...,...
739,2019-12-31,1900,6,175,S,12
740,2019-12-31,2000,6,176,S,13
741,2019-12-31,2100,7,176,S,14
742,2019-12-31,2200,7,176,S,15


In [25]:
hourlyWeatherDF2019.dtypes

Date                      object
Time                       int64
WindSpeed(mph)            object
WindDirection(Degrees)    object
WindDirection(Compass)    object
WindGust(mph)             object
dtype: object

In [26]:
hourlyWeatherDF2019['hour'] =pd.to_timedelta(hourlyWeatherDF2019['Time'], unit='h')

In [27]:
hourlyWeatherDF2019

Unnamed: 0,Date,Time,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph),hour
0,2019-01-01,0,12,126,SE,24,0 days 00:00:00
1,2019-01-01,100,13,89,E,23,4 days 04:00:00
2,2019-01-01,200,14,53,NE,23,8 days 08:00:00
3,2019-01-01,300,15,17,NNE,22,12 days 12:00:00
4,2019-01-01,400,14,18,NNE,21,16 days 16:00:00
...,...,...,...,...,...,...,...
739,2019-12-31,1900,6,175,S,12,79 days 04:00:00
740,2019-12-31,2000,6,176,S,13,83 days 08:00:00
741,2019-12-31,2100,7,176,S,14,87 days 12:00:00
742,2019-12-31,2200,7,176,S,15,91 days 16:00:00


In [28]:
# Creating new column called 'hour' in timedelta format without date
hourlyWeatherDF2019['hour'] = hourlyWeatherDF2019['hour'] - pd.to_timedelta(hourlyWeatherDF2019['hour'].dt.days, unit='d')

In [29]:
# Dropping the original 'Hour' column
hourlyWeatherDF2019 = hourlyWeatherDF2019.drop('Time', 1)

In [30]:
hourlyWeatherDF2019

Unnamed: 0,Date,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph),hour
0,2019-01-01,12,126,SE,24,00:00:00
1,2019-01-01,13,89,E,23,04:00:00
2,2019-01-01,14,53,NE,23,08:00:00
3,2019-01-01,15,17,NNE,22,12:00:00
4,2019-01-01,14,18,NNE,21,16:00:00
...,...,...,...,...,...,...
739,2019-12-31,6,175,S,12,04:00:00
740,2019-12-31,6,176,S,13,08:00:00
741,2019-12-31,7,176,S,14,12:00:00
742,2019-12-31,7,176,S,15,16:00:00


In [31]:
hourlyWeatherDF2019

Unnamed: 0,Date,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph),hour
0,2019-01-01,12,126,SE,24,00:00:00
1,2019-01-01,13,89,E,23,04:00:00
2,2019-01-01,14,53,NE,23,08:00:00
3,2019-01-01,15,17,NNE,22,12:00:00
4,2019-01-01,14,18,NNE,21,16:00:00
...,...,...,...,...,...,...
739,2019-12-31,6,175,S,12,04:00:00
740,2019-12-31,6,176,S,13,08:00:00
741,2019-12-31,7,176,S,14,12:00:00
742,2019-12-31,7,176,S,15,16:00:00


In [32]:
data = "Resources/Hackberry_Generation.csv"
Hackberry_df = pd.read_csv(data)
Hackberry_df

Unnamed: 0,Unit,Date,Hour Ending,MWH
0,HWF_HWFG1,20190101,100,110.487950
1,HWF_HWFG1,20190101,200,72.020225
2,HWF_HWFG1,20190101,300,67.639475
3,HWF_HWFG1,20190101,400,63.718900
4,HWF_HWFG1,20190101,500,61.264250
...,...,...,...,...
13866,HWF_HWFG1,20200731,2000,4.998600
13867,HWF_HWFG1,20200731,2100,16.390275
13868,HWF_HWFG1,20200731,2200,20.637800
13869,HWF_HWFG1,20200731,2300,13.998975


In [33]:
Hackberry_df.dtypes

Unit            object
Date             int64
Hour Ending     object
MWH            float64
dtype: object

In [34]:
# Dropping Unit Column
Hackberry_df = Hackberry_df.drop('Unit', 1)

In [35]:
# Renaming column name from 'Hour Ending' to 'Hour'
Hackberry_df.rename(columns = {'Hour Ending':'Hour'}, inplace = True)

In [36]:
# Converting Date to datetime
Hackberry_df['Date'] = pd.to_datetime(Hackberry_df['Date'], format='%Y%m%d')

In [37]:
# Checking data type
Hackberry_df.dtypes

Date    datetime64[ns]
Hour            object
MWH            float64
dtype: object

In [38]:
Hackberry_df

Unnamed: 0,Date,Hour,MWH
0,2019-01-01,100,110.487950
1,2019-01-01,200,72.020225
2,2019-01-01,300,67.639475
3,2019-01-01,400,63.718900
4,2019-01-01,500,61.264250
...,...,...,...
13866,2020-07-31,2000,4.998600
13867,2020-07-31,2100,16.390275
13868,2020-07-31,2200,20.637800
13869,2020-07-31,2300,13.998975


In [39]:
# Removing the last two characters from the hour column as the times are hourly
Hackberry_df['Hour'] = Hackberry_df['Hour'].astype(str).str[:-2].astype(np.int64)

In [40]:
# Checking Hour data type
Hackberry_df.dtypes

Date    datetime64[ns]
Hour             int64
MWH            float64
dtype: object

In [41]:
Hackberry_df['hour'] =pd.to_timedelta(Hackberry_df['Hour'], unit='h')

In [42]:
# Creating new column called 'hour' in timedelta format without date
Hackberry_df['hour'] = Hackberry_df['hour'] - pd.to_timedelta(Hackberry_df['hour'].dt.days, unit='d')

In [43]:
# Dropping the original 'Hour' column
Hackberry_df = Hackberry_df.drop('Hour', 1)

In [44]:
# Re-arranging the column headers
Hackberry_df = Hackberry_df[["Date", "hour", "MWH"]]

In [45]:
Hackberry_df.dtypes

Date     datetime64[ns]
hour    timedelta64[ns]
MWH             float64
dtype: object

In [46]:
# Combining Date and hour to a single column as they are in datetime and timedelta formats
Hackberry_df['time'] = Hackberry_df['Date'] + Hackberry_df['hour']

In [47]:
Hackberry_df

Unnamed: 0,Date,hour,MWH,time
0,2019-01-01,01:00:00,110.487950,2019-01-01 01:00:00
1,2019-01-01,02:00:00,72.020225,2019-01-01 02:00:00
2,2019-01-01,03:00:00,67.639475,2019-01-01 03:00:00
3,2019-01-01,04:00:00,63.718900,2019-01-01 04:00:00
4,2019-01-01,05:00:00,61.264250,2019-01-01 05:00:00
...,...,...,...,...
13866,2020-07-31,20:00:00,4.998600,2020-07-31 20:00:00
13867,2020-07-31,21:00:00,16.390275,2020-07-31 21:00:00
13868,2020-07-31,22:00:00,20.637800,2020-07-31 22:00:00
13869,2020-07-31,23:00:00,13.998975,2020-07-31 23:00:00


In [48]:
# Re-arranging the column headers and removing the Date and hour columns
Hackberry_df = Hackberry_df[["time", "MWH"]]

In [49]:
Hackberry_df.dtypes

time    datetime64[ns]
MWH            float64
dtype: object

In [50]:
Hackberry_df

Unnamed: 0,time,MWH
0,2019-01-01 01:00:00,110.487950
1,2019-01-01 02:00:00,72.020225
2,2019-01-01 03:00:00,67.639475
3,2019-01-01 04:00:00,63.718900
4,2019-01-01 05:00:00,61.264250
...,...,...
13866,2020-07-31 20:00:00,4.998600
13867,2020-07-31 21:00:00,16.390275
13868,2020-07-31 22:00:00,20.637800
13869,2020-07-31 23:00:00,13.998975


In [51]:
import warnings
warnings.filterwarnings('ignore')

In [52]:
from pathlib import Path
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [53]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [54]:
# Visually inspect the data
plt.scatter(df.WindSpeed(mph), df.MWH)
plt.xlabel('Wind Speed')
plt.ylabel('MWH')
plt.show()

NameError: name 'df' is not defined

In [None]:
X = df.WindSpeed(mph).values.reshape(-1, 1)

In [None]:
#Examining the first 5 entries
X[:5]

In [None]:
# Create our features
X = df.copy()
X = pd.get_dummies(df.drop('MWH', axis=1))

# Create our target
y = df['MWH'].tolist()
y[:5]

In [None]:
# Split the X and y into X_train, X_test, y_train, y_test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
Counter(y_train)