# Wind ETL

In [1]:
# Initial Imports
import requests
import pandas as pd
import json
import numpy as np
from datetime import datetime
from datetime import timedelta

In [2]:
# Import API Key
from config import key

# Documentation:
#https://www.worldweatheronline.com/developer/my/analytics.aspx?key_id=222419

## Functions

In [3]:
# Make a request to the worldweatheronline local history weather API page
def makeARequest(location, startDate, endDate, yourAPIKey):
    baseURL = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx"
    timeInterval = "1"
    outputFormatToReturn = "json"

    requestURL = f"{baseURL}?q={location}&date={startDate}&enddate={endDate}&tp={timeInterval}&format={outputFormatToReturn}&key={yourAPIKey}"
    
    response = requests.get(requestURL)

    if response.status_code == 200:
        responseJson = response.json()
        return responseJson
    else:
        return print(response.status_code)

In [4]:
# Pull the wind variables from the responseJson 
def monthlyHistoricalWeather(firstDayOfMonth, lastDayOfMonth, jsonResponse):
    
    first = datetime.strptime(firstDayOfMonth, '%Y-%m-%d')
    last = datetime.strptime(lastDayOfMonth, '%Y-%m-%d')
    numberOfDays = last.day - first.day

    HourlyHistoricalWeather = []
    
    for day in np.arange(0,numberOfDays + 1,1):
        for hour in np.arange(0,24,1):
            HourlyHistoricalWeather.append({
                "Date" : jsonResponse["data"]["weather"][day]["date"],
                "Time" : jsonResponse["data"]["weather"][day]["hourly"][hour]["time"],
                "temperature(F)" : jsonResponse["data"]["weather"][0]["hourly"][0]["tempF"],
                "weatherDescription" : jsonResponse["data"]["weather"][0]["hourly"][0]["weatherDesc"][0]["value"],
                "WindSpeed(mph)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["windspeedMiles"],
                "WindDirection(Degrees)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["winddirDegree"],
                "WindDirection(Compass)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["winddir16Point"],
                "WindGust(mph)" : jsonResponse["data"]["weather"][day]["hourly"][hour]["WindGustMiles"]
            })

    return HourlyHistoricalWeather

In [5]:
# Store the variables in a DataFrame
def monthlyHistoricalWeatherDF(month):
    weatherDataFrame = pd.DataFrame(month)
    return weatherDataFrame

## Extracting Weather Data

In [6]:
# Define the Latitude and longitude of Hackberry Wind Farm
    # https://www.thewindpower.net/windfarm_en_4012_hackberry.php
    # Latitude: 32.776111
    # Longitude: -99.476444
latLong = "32.776111,-99.476444"

In [7]:
# January
date = "2019-01-01"
enddate = "2019-01-31"

responseJson = makeARequest(latLong, date, enddate, key)

January = monthlyHistoricalWeather(date, enddate, responseJson)
JanuaryDF = monthlyHistoricalWeatherDF(January)
JanuaryDF.head()

Unnamed: 0,Date,Time,temperature(F),weatherDescription,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph)
0,2019-01-01,0,35,Clear,12,126,SE,24
1,2019-01-01,100,35,Clear,13,89,E,23
2,2019-01-01,200,35,Clear,14,53,NE,23
3,2019-01-01,300,35,Clear,15,17,NNE,22
4,2019-01-01,400,35,Clear,14,18,NNE,21


In [8]:
# February
date = "2019-02-01"
enddate = "2019-02-28"

responseJson = makeARequest(latLong, date, enddate, key)

February = monthlyHistoricalWeather(date, enddate, responseJson)
FebruaryDF = monthlyHistoricalWeatherDF(February)

In [9]:
# March
date = "2019-03-01"
enddate = "2019-03-31"

responseJson = makeARequest(latLong, date, enddate, key)

March = monthlyHistoricalWeather(date, enddate, responseJson)
MarchDF = monthlyHistoricalWeatherDF(March)

In [10]:
# April
date = "2019-04-01"
enddate = "2019-04-30"

responseJson = makeARequest(latLong, date, enddate, key)

April = monthlyHistoricalWeather(date, enddate, responseJson)
AprilDF = monthlyHistoricalWeatherDF(April)

In [11]:
# May
date = "2019-05-01"
enddate = "2019-05-31"

responseJson = makeARequest(latLong, date, enddate, key)

May = monthlyHistoricalWeather(date, enddate, responseJson)
MayDF = monthlyHistoricalWeatherDF(May)

In [12]:
# June
date = "2019-06-01"
enddate = "2019-06-30"

responseJson = makeARequest(latLong, date, enddate, key)

June = monthlyHistoricalWeather(date, enddate, responseJson)
JuneDF = monthlyHistoricalWeatherDF(June)

In [13]:
# July
date = "2019-07-01"
enddate = "2019-07-31"

responseJson = makeARequest(latLong, date, enddate, key)

July = monthlyHistoricalWeather(date, enddate, responseJson)
JulyDF = monthlyHistoricalWeatherDF(July)

In [14]:
# August
date = "2019-08-01"
enddate = "2019-08-31"

responseJson = makeARequest(latLong, date, enddate, key)

August = monthlyHistoricalWeather(date, enddate, responseJson)
AugustDF = monthlyHistoricalWeatherDF(August)

In [15]:
# September
date = "2019-09-01"
enddate = "2019-09-30"

responseJson = makeARequest(latLong, date, enddate, key)

September = monthlyHistoricalWeather(date, enddate, responseJson)
SeptemberDF = monthlyHistoricalWeatherDF(September)

In [16]:
# October
date = "2019-10-01"
enddate = "2019-10-31"

responseJson = makeARequest(latLong, date, enddate, key)

October = monthlyHistoricalWeather(date, enddate, responseJson)
OctoberDF = monthlyHistoricalWeatherDF(October)

In [17]:
# November
date = "2019-11-01"
enddate = "2019-11-30"

responseJson = makeARequest(latLong, date, enddate, key)

November = monthlyHistoricalWeather(date, enddate, responseJson)
NovemberDF = monthlyHistoricalWeatherDF(November)

In [18]:
# December
date = "2019-12-01"
enddate = "2019-12-31"

responseJson = makeARequest(latLong, date, enddate, key)

December = monthlyHistoricalWeather(date, enddate, responseJson)
DecemberDF = monthlyHistoricalWeatherDF(December)

In [19]:
# January
date = "2020-01-01"
enddate = "2020-01-31"

responseJson = makeARequest(latLong, date, enddate, key)

Jan2020 = monthlyHistoricalWeather(date, enddate, responseJson)
Jan2020DF = monthlyHistoricalWeatherDF(Jan2020)

In [20]:
# February
date = "2020-02-01"
enddate = "2020-02-29"

responseJson = makeARequest(latLong, date, enddate, key)

Feb2020 = monthlyHistoricalWeather(date, enddate, responseJson)
Feb2020DF = monthlyHistoricalWeatherDF(Feb2020)

In [21]:
# March
date = "2020-03-01"
enddate = "2020-03-31"

responseJson = makeARequest(latLong, date, enddate, key)

March2020 = monthlyHistoricalWeather(date, enddate, responseJson)
March2020DF = monthlyHistoricalWeatherDF(March2020)

In [22]:
# April
date = "2020-04-01"
enddate = "2020-04-30"

responseJson = makeARequest(latLong, date, enddate, key)

April2020 = monthlyHistoricalWeather(date, enddate, responseJson)
April2020DF = monthlyHistoricalWeatherDF(April2020)

In [23]:
# May
date = "2020-05-01"
enddate = "2020-05-31"

responseJson = makeARequest(latLong, date, enddate, key)

May2020 = monthlyHistoricalWeather(date, enddate, responseJson)
May2020DF = monthlyHistoricalWeatherDF(May2020)

In [24]:
# June
date = "2020-06-01"
enddate = "2020-06-30"

responseJson = makeARequest(latLong, date, enddate, key)

June2020 = monthlyHistoricalWeather(date, enddate, responseJson)
June2020DF = monthlyHistoricalWeatherDF(June2020)

In [25]:
# July
date = "2020-07-01"
enddate = "2020-07-31"

responseJson = makeARequest(latLong, date, enddate, key)

July2020 = monthlyHistoricalWeather(date, enddate, responseJson)
July2020DF = monthlyHistoricalWeatherDF(July2020)

In [26]:
# Combine each month into a single DataFrame
#hourlyWeatherDF2019 = JanuaryDF.append([FebruaryDF, MarchDF, AprilDF, MayDF, JuneDF, JulyDF, AugustDF, SeptemberDF, OctoberDF, NovemberDF, DecemberDF]) 
hourlyWeatherDF = JanuaryDF.append([FebruaryDF, MarchDF, AprilDF, MayDF, JuneDF, JulyDF, AugustDF, SeptemberDF, OctoberDF, NovemberDF, DecemberDF, Jan2020DF, Feb2020DF, March2020DF, April2020DF, May2020DF, June2020DF, July2020DF]) 
index = np.arange(0,24*578,1)
hourlyWeatherDF = hourlyWeatherDF.set_index(index)
hourlyWeatherDF

Unnamed: 0,Date,Time,temperature(F),weatherDescription,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph)
0,2019-01-01,0,35,Clear,12,126,SE,24
1,2019-01-01,100,35,Clear,13,89,E,23
2,2019-01-01,200,35,Clear,14,53,NE,23
3,2019-01-01,300,35,Clear,15,17,NNE,22
4,2019-01-01,400,35,Clear,14,18,NNE,21
...,...,...,...,...,...,...,...,...
13867,2020-07-31,1900,82,Patchy rain possible,8,104,ESE,11
13868,2020-07-31,2000,82,Patchy rain possible,8,78,ENE,12
13869,2020-07-31,2100,82,Patchy rain possible,7,52,NE,13
13870,2020-07-31,2200,82,Patchy rain possible,7,55,NE,13


In [27]:
hourlyWeatherDF.dtypes

Date                      object
Time                      object
temperature(F)            object
weatherDescription        object
WindSpeed(mph)            object
WindDirection(Degrees)    object
WindDirection(Compass)    object
WindGust(mph)             object
dtype: object

# Performing Transformation on Wind Weather Data

In [28]:
def cleaningDataFrame_datetime(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df['Time'] = df['Time'].astype(int)
    df['Time'] = (df['Time']/100).astype(int)
    df['Time'] = df['Time'].astype('timedelta64[h]')
    df['Time'] = df['Time'] - pd.to_timedelta(df['Time'].dt.days, unit='d')
    df['Date_Time'] = df['Date'] + df['Time']
    df = df.drop(['Time', 'Date'], 1)
    return df

In [29]:
def cleaningDataFrame_wind(df):
    df.rename(columns = {'temperature(F)':'temperature_F','WindSpeed(mph)':'WindSpeed_mph', 'WindDirection(Degrees)':'WindDirection_degrees',   'WindDirection(Compass)':'WindDirection_compass', 'WindGust(mph)':'WindGust_mph'}, inplace = True)
    df['temperature_F'] = df['temperature_F'].astype(int)
    df['weatherDescription'] = df['weatherDescription'].astype(str)
    df['WindSpeed_mph'] = df['WindSpeed_mph'].astype(int)
    df['WindDirection_degrees'] = df['WindDirection_degrees'].astype(int)
    df['WindGust_mph'] = df['WindGust_mph'].astype(int)
    #df = df.drop(df.index[0])
    return df

In [30]:
cleanDateTime_hourlyWeatherDF = cleaningDataFrame_datetime(hourlyWeatherDF)
cleanDateTime_hourlyWeatherDF.head()

Unnamed: 0,temperature(F),weatherDescription,WindSpeed(mph),WindDirection(Degrees),WindDirection(Compass),WindGust(mph),Date_Time
0,35,Clear,12,126,SE,24,2019-01-01 00:00:00
1,35,Clear,13,89,E,23,2019-01-01 01:00:00
2,35,Clear,14,53,NE,23,2019-01-01 02:00:00
3,35,Clear,15,17,NNE,22,2019-01-01 03:00:00
4,35,Clear,14,18,NNE,21,2019-01-01 04:00:00


In [31]:
cleaned_hourlyWeatherDF = cleaningDataFrame_wind(cleanDateTime_hourlyWeatherDF)
print(cleaned_hourlyWeatherDF.shape)
cleaned_hourlyWeatherDF.head()

(13872, 7)


Unnamed: 0,temperature_F,weatherDescription,WindSpeed_mph,WindDirection_degrees,WindDirection_compass,WindGust_mph,Date_Time
0,35,Clear,12,126,SE,24,2019-01-01 00:00:00
1,35,Clear,13,89,E,23,2019-01-01 01:00:00
2,35,Clear,14,53,NE,23,2019-01-01 02:00:00
3,35,Clear,15,17,NNE,22,2019-01-01 03:00:00
4,35,Clear,14,18,NNE,21,2019-01-01 04:00:00


In [32]:
cleaned_hourlyWeatherDF.dtypes

temperature_F                     int32
weatherDescription               object
WindSpeed_mph                     int32
WindDirection_degrees             int32
WindDirection_compass            object
WindGust_mph                      int32
Date_Time                datetime64[ns]
dtype: object

In [33]:
# Uncomment to upload data
#windDF.to_csv(r'Output/weather_wind.csv', index = False)

# Extracting and Transoforming the Hackberry Wind Energy Data

In [34]:
data = "Resources/Hackberry_Generation.csv"
Hackberry_df = pd.read_csv(data)
print(Hackberry_df.shape)
Hackberry_df.head()

(13871, 4)


Unnamed: 0,Unit,Date,Hour Ending,MWH
0,HWF_HWFG1,20190101,100,110.48795
1,HWF_HWFG1,20190101,200,72.020225
2,HWF_HWFG1,20190101,300,67.639475
3,HWF_HWFG1,20190101,400,63.7189
4,HWF_HWFG1,20190101,500,61.26425


In [35]:
def cleanRenewableFarmData(df):
    '''
    This function cleans the raw farm data.
    '''
    df = df.drop('Unit', 1)
    df.rename(columns = {'Hour Ending':'Hour'}, inplace = True)
    df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d')
    df['Hour'] = df['Hour'].astype(str).str[:-2].astype(np.int64)
    df['hour'] = pd.to_timedelta(df['Hour'], unit='h')
    df['hour'] = df['hour'] - pd.to_timedelta(df['hour'].dt.days, unit='d')
    df = df.drop('Hour', 1)
    df = df[["Date", "hour", "MWH"]]
    df['Date_Time'] = df['Date'] + df['hour']
    df = df[["Date_Time", "MWH"]]
    return df

In [36]:
cleaned_Hackberry_df = cleanRenewableFarmData(Hackberry_df)
print(cleaned_Hackberry_df.shape)
cleaned_Hackberry_df

(13871, 2)


Unnamed: 0,Date_Time,MWH
0,2019-01-01 01:00:00,110.487950
1,2019-01-01 02:00:00,72.020225
2,2019-01-01 03:00:00,67.639475
3,2019-01-01 04:00:00,63.718900
4,2019-01-01 05:00:00,61.264250
...,...,...
13866,2020-07-31 20:00:00,4.998600
13867,2020-07-31 21:00:00,16.390275
13868,2020-07-31 22:00:00,20.637800
13869,2020-07-31 23:00:00,13.998975


In [37]:
# check data types to make sure they are correct
cleaned_Hackberry_df.dtypes

Date_Time    datetime64[ns]
MWH                 float64
dtype: object

In [38]:
# Uncomment to upload
# cleaned_Hackberry_df.to_csv(r'Output/Hackberry_MWH.csv', index = False)

## Merging Hackberry Energy Data with Wind Weather Data

In [39]:
Hackberry_Wind_MWH = pd.merge(cleaned_hourlyWeatherDF, cleaned_Hackberry_df, on='Date_Time', how='outer')
Hackberry_Wind_MWH

Unnamed: 0,temperature_F,weatherDescription,WindSpeed_mph,WindDirection_degrees,WindDirection_compass,WindGust_mph,Date_Time,MWH
0,35,Clear,12,126,SE,24,2019-01-01 00:00:00,5.009100
1,35,Clear,13,89,E,23,2019-01-01 01:00:00,110.487950
2,35,Clear,14,53,NE,23,2019-01-01 02:00:00,72.020225
3,35,Clear,15,17,NNE,22,2019-01-01 03:00:00,67.639475
4,35,Clear,14,18,NNE,21,2019-01-01 04:00:00,63.718900
...,...,...,...,...,...,...,...,...
13868,82,Patchy rain possible,8,104,ESE,11,2020-07-31 19:00:00,10.764125
13869,82,Patchy rain possible,8,78,ENE,12,2020-07-31 20:00:00,4.998600
13870,82,Patchy rain possible,7,52,NE,13,2020-07-31 21:00:00,16.390275
13871,82,Patchy rain possible,7,55,NE,13,2020-07-31 22:00:00,20.637800


In [50]:
# Uncomment to upload csv
Hackberry_Wind_MWH.to_csv(r'Output/Hackberry_Wind_MWH.csv', index = False)

# Mongo


In [41]:
# import dependencies
import config
import pymongo
import pandas as pd
import json

In [42]:
# set string variables
DEFAULT_DATABASE = 'wind_solar_data' 
USERNAME = config.USERNAME
PASSWORD = config.PASSWORD

#create connection to database
client = pymongo.MongoClient(f"mongodb+srv://{USERNAME}:{PASSWORD}@austin-green-energy.pwzpm.mongodb.net/{DEFAULT_DATABASE}?retryWrites=true&w=majority")
try:
    client.server_info()
    print("Mongodb connected")
except:
    print("The Mongodb failed to connect. Check username/password in connection string.")


Mongodb connected


In [51]:
# Uploading the wind data to the Database

# select database
db = client.get_database('wind_solar_data')
# select collection
collection = db.wind_data

# pull the csv from file
wind_data = wind_data = pd.read_csv('.\Output\Hackberry_Wind_MWH.csv')  
# turn the CSV into a JSON
wind_data_json = json.loads(wind_data.to_json(orient='records'))

# remove what is in the collection cureently
collection.remove()
# insert the new JSON data into the database
collection.insert(wind_data_json)

f95bc0fb207a563b6c8528b'),
 ObjectId('5f95bc0fb207a563b6c8528c'),
 ObjectId('5f95bc0fb207a563b6c8528d'),
 ObjectId('5f95bc0fb207a563b6c8528e'),
 ObjectId('5f95bc0fb207a563b6c8528f'),
 ObjectId('5f95bc0fb207a563b6c85290'),
 ObjectId('5f95bc0fb207a563b6c85291'),
 ObjectId('5f95bc0fb207a563b6c85292'),
 ObjectId('5f95bc0fb207a563b6c85293'),
 ObjectId('5f95bc0fb207a563b6c85294'),
 ObjectId('5f95bc0fb207a563b6c85295'),
 ObjectId('5f95bc0fb207a563b6c85296'),
 ObjectId('5f95bc0fb207a563b6c85297'),
 ObjectId('5f95bc0fb207a563b6c85298'),
 ObjectId('5f95bc0fb207a563b6c85299'),
 ObjectId('5f95bc0fb207a563b6c8529a'),
 ObjectId('5f95bc0fb207a563b6c8529b'),
 ObjectId('5f95bc0fb207a563b6c8529c'),
 ObjectId('5f95bc0fb207a563b6c8529d'),
 ObjectId('5f95bc0fb207a563b6c8529e'),
 ObjectId('5f95bc0fb207a563b6c8529f'),
 ObjectId('5f95bc0fb207a563b6c852a0'),
 ObjectId('5f95bc0fb207a563b6c852a1'),
 ObjectId('5f95bc0fb207a563b6c852a2'),
 ObjectId('5f95bc0fb207a563b6c852a3'),
 ObjectId('5f95bc0fb207a563b6c852a4')

In [52]:
# select database
db = client.get_database('wind_solar_data')
# select collection
collection = db.wind_data

# pull collection into dataframe
wind_df = pd.DataFrame(list(collection.find()))
wind_df

Unnamed: 0,_id,temperature_F,weatherDescription,WindSpeed_mph,WindDirection_degrees,WindDirection_compass,WindGust_mph,Date_Time,MWH
0,5f95bc0fb207a563b6c850a4,35,Clear,12,126,SE,24,2019-01-01 00:00:00,5.009100
1,5f95bc0fb207a563b6c850a5,35,Clear,13,89,E,23,2019-01-01 01:00:00,110.487950
2,5f95bc0fb207a563b6c850a6,35,Clear,14,53,NE,23,2019-01-01 02:00:00,72.020225
3,5f95bc0fb207a563b6c850a7,35,Clear,15,17,NNE,22,2019-01-01 03:00:00,67.639475
4,5f95bc0fb207a563b6c850a8,35,Clear,14,18,NNE,21,2019-01-01 04:00:00,63.718900
...,...,...,...,...,...,...,...,...,...
13868,5f95bc0fb207a563b6c886d0,82,Patchy rain possible,8,104,ESE,11,2020-07-31 19:00:00,10.764125
13869,5f95bc0fb207a563b6c886d1,82,Patchy rain possible,8,78,ENE,12,2020-07-31 20:00:00,4.998600
13870,5f95bc0fb207a563b6c886d2,82,Patchy rain possible,7,52,NE,13,2020-07-31 21:00:00,16.390275
13871,5f95bc0fb207a563b6c886d3,82,Patchy rain possible,7,55,NE,13,2020-07-31 22:00:00,20.637800
