In [1]:
#import libraries
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
import matplotlib.pyplot as plt

In [2]:
#load the data
file_path = 'clean_data.csv'
data = pd.read_csv(file_path)
data = data.drop(columns= ['Daily MWH','Unnamed: 0', 'HashedIndex', 'Combined', 'Year', 'Month','Day'])
data['Demand'] = data['Demand'].astype(float)

In [3]:
data.head()

Unnamed: 0,Demand,Fuel,Date,Time,Temp Max Dallas North Central,Temp Avg Dallas North Central,TempMin Dallas North Central,Dew Point Max Dallas North Central,Dew Point Avg Dallas North Central,Dew Point Min Dallas North Central,...,Humidity Percent Max Midland Far West,Humidity Percent Avg Midland Far West,Humidity Percent Min Midland Far West,Wind Speed Max Midland Far West,Wind Speed Avg Midland Far West,Wind Speed Min Midland Far West,Pressures Inch Max Midland Far West,Pressure InchAvg Midland Far West,Pressure Inch Min Midland Far West,Precipitation Total Midland Far West
0,3376.329577,Coal,2007-01-01,00:00:00,52.0,41.3,34,29,27.0,25,...,88,65.0,41,13,8.0,3,27.3,27.3,27.2,0.0
1,3368.077206,Coal,2007-01-01,00:15:00,52.0,41.3,34,29,27.0,25,...,88,65.0,41,13,8.0,3,27.3,27.3,27.2,0.0
2,3365.003007,Coal,2007-01-01,00:30:00,52.0,41.3,34,29,27.0,25,...,88,65.0,41,13,8.0,3,27.3,27.3,27.2,0.0
3,3364.70059,Coal,2007-01-01,00:45:00,52.0,41.3,34,29,27.0,25,...,88,65.0,41,13,8.0,3,27.3,27.3,27.2,0.0
4,3337.219223,Coal,2007-01-01,01:00:00,52.0,41.3,34,29,27.0,25,...,88,65.0,41,13,8.0,3,27.3,27.3,27.2,0.0


In [23]:
q = data[data['Date'] == '2011-06-02']
q

Unnamed: 0,Demand,Fuel,Date,Time,Temp Max Dallas North Central,Temp Avg Dallas North Central,TempMin Dallas North Central,Dew Point Max Dallas North Central,Dew Point Avg Dallas North Central,Dew Point Min Dallas North Central,...,Humidity Percent Max Midland Far West,Humidity Percent Avg Midland Far West,Humidity Percent Min Midland Far West,Wind Speed Max Midland Far West,Wind Speed Avg Midland Far West,Wind Speed Min Midland Far West,Pressures Inch Max Midland Far West,Pressure InchAvg Midland Far West,Pressure Inch Min Midland Far West,Precipitation Total Midland Far West


seperate data by fueltype 

In [4]:
#get unique fuel types
fuel_types = data['Fuel'].unique()
print(fuel_types)

['Coal' 'Gas' 'Hydro' 'Nuclear' ' Oth' 'Wind' 'Other' 'Biomass' 'Solar']


In [5]:
#create a dict for dataframes to be stored 
dataframes = {}

#create a for loop to create all the data frames
for fuel_type in fuel_types:
    df_name = f"{fuel_type.lower()}_data"
    dataframes[df_name] = data[data["Fuel"] == fuel_type]
    dataframes[df_name] = dataframes[df_name].drop(columns= 'Fuel')

In [6]:
#check that the df names are filled correctly
dataframes.keys()

dict_keys(['coal_data', 'gas_data', 'hydro_data', 'nuclear_data', ' oth_data', 'wind_data', 'other_data', 'biomass_data', 'solar_data'])

In [7]:
#create a variable for easier ref
coal_data = dataframes['coal_data']

In [8]:
# Handling missing values (if any)
coal_data.fillna(method='ffill', inplace=True)

In [9]:
#save feeatures t0 a list for scaling
# features = dataframes['coal_data'].columns

# feature_list = []
# for feature in features:
#     feature_list.append(feature)

# print(feature_list)

In [10]:
#features = dataframes['coal_data'][['Demand', 'Fuel', 'Date', 'Time', 'Temp Max Dallas North Central', 'Temp Avg Dallas North Central', 'TempMin Dallas North Central', 'Dew Point Max Dallas North Central', 'Dew Point Avg Dallas North Central', 'Dew Point Min Dallas North Central', 'Humidity Percent Max Dallas North Central', 'Humidity Percent Avg Dallas North Central', 'Humidity Percent Min Dallas North Central', 'Wind Speed Max Dallas North Central', 'Wind Speed Avg Dallas North Central', 'Wind Speed Min Dallas North Central', 'Pressures Inch Max Dallas North Central', 'Pressure InchAvg Dallas North Central', 'Pressure Inch Min Dallas North Central', 'Precipitation Total Dallas North Central', 'Temp Max Abilene West', 'Temp Avg Abilene West', 'TempMin Abilene West', 'Dew Point Max Abilene West', 'Dew Point Avg Abilene West', 'Dew Point Min Abilene West', 'Humidity Percent Max Abilene West', 'Humidity Percent Avg Abilene West', 'Humidity Percent Min Abilene West', 'Wind Speed Max Abilene West', 'Wind Speed Avg Abilene West', 'Wind Speed Min Abilene West', 'Pressures Inch Max Abilene West', 'Pressure InchAvg Abilene West', 'Pressure Inch Min Abilene West', 'Precipitation Total Abilene West', 'Temp Max Austin  South Central', 'Temp Avg Austin  South Central', 'TempMin Austin  South Central', 'Dew Point Max Austin  South Central', 'Dew Point Avg Austin  South Central', 'Dew Point Min Austin  South Central', 'Humidity Percent Max Austin  South Central', 'Humidity Percent Avg Austin  South Central', 'Humidity Percent Min Austin  South Central', 'Wind Speed Max Austin  South Central', 'Wind Speed Avg Austin  South Central', 'Wind Speed Min Austin  South Central', 'Pressures Inch Max Austin  South Central', 'Pressure InchAvg Austin  South Central', 'Pressure Inch Min Austin  South Central', 'Precipitation Total Austin  South Central', 'Temp Max Tyler East', 'Temp Avg Tyler East', 'TempMin Tyler East', 'Dew Point Max Tyler East', 'Dew Point Avg Tyler East', 'Dew Point Min Tyler East', 'Humidity Percent Max Tyler East', 'Humidity Percent Avg Tyler East', 'Humidity Percent Min Tyler East', 'Wind Speed Max Tyler East', 'Wind Speed Avg Tyler East', 'Wind Speed Min Tyler East', 'Pressures Inch Max Tyler East', 'Pressure InchAvg Tyler East', 'Pressure Inch Min Tyler East', 'Precipitation Total Tyler East', 'Temp Max Houston Coast', 'Temp Avg Houst Coast', 'TempMin Houston Coast', 'Dew Point Max Houston Coast', 'Dew Point Avg Houston Coast', 'Dew Point Min Houston Coast', 'Humidity Percent Max Houston Coast', 'Humidity Percent Avg Houston Coast', 'Humidity Percent Min Houston Coast', 'Wind Speed Max Houston Coast', 'Wind Speed Avg Houston Coast', 'Wind Speed Min Houston Coast', 'Pressures Inch Max Houston Coast', 'Pressure InchAvg Houston Coast', 'Pressure Inch Min Houston Coast', 'Precipitation Total Houston Coast', 'Temp Max Corpus Christi South', 'Temp Avg  Corpus Christi South', 'TempMin  Corpus Christi South', 'Dew Point Max  Corpus Christi South', 'Dew Point Avg  Corpus Christi South', 'Dew Point Min  Corpus Christi South', 'Humidity Percent Max Corpus Christi South', 'Humidity Percent Avg Corpus Christi South', 'Humidity Percent Min Corpus Christi South', 'Wind Speed Max Corpus Christi South', 'Wind Speed Avg Corpus Christi South', 'Wind Speed Min Corpus Christi South', 'Pressures Inch Max Corpus Christi South', 'Pressure InchAvg Corpus Christi South', 'Pressure Inch Min Corpus Christi South', 'Precipitation Total Corpus Christi South', 'Temp Max Lubbock North', 'Temp Avg Lubbock North', 'TempMin Lubbock North', 'Dew Point Max Lubbock North', 'Dew Point Avg Lubbock North', 'Dew Point Min Lubbock North', 'Humidity Percent Max Lubbock North', 'Humidity Percent Avg Lubbock North', 'Humidity Percent Min Lubbock North', 'Wind Speed Max Lubbock North', 'Wind Speed Avg Lubbock North', 'Wind Speed Min Lubbock North', 'Pressures Inch Max Lubbock North', 'Pressure InchAvg Lubbock North', 'Pressure Inch Min Lubbock North', 'Precipitation Total Lubbock North', 'Temp Max Midland Far West', 'Temp Avg Midland Far West', 'TempMin Midland Far West', 'Dew Point Max Midland Far West', 'Dew Point Avg Midland Far West', 'Dew Point Min Midland Far West', 'Humidity Percent Max Midland Far West', 'Humidity Percent Avg Midland Far West', 'Humidity Percent Min Midland Far West', 'Wind Speed Max Midland Far West', 'Wind Speed Avg Midland Far West', 'Wind Speed Min Midland Far West', 'Pressures Inch Max Midland Far West', 'Pressure InchAvg Midland Far West', 'Pressure Inch Min Midland Far West', 'Precipitation Total Midland Far West']]

In [11]:
#normilize the data 

#Combine Date and Time into a single column
coal_data['Datetime'] = coal_data['Date'] + ' ' + coal_data['Time']

#Convert the combined column to datetime
coal_data['Datetime'] = pd.to_datetime(coal_data['Datetime'])

#UTC
coal_data['Datetime'] = pd.to_datetime(coal_data['Datetime']).dt.tz_localize('UTC')

#Convert datetime to timestamp (numeric)
coal_data['Timestamp'] = coal_data['Datetime'].astype('int64') // 10**9  # Convert to seconds since epoch


num_col =coal_data.select_dtypes(include=['float64', 'int64', 'float32'])
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(num_col)

In [12]:
# Splitting data into train and test sets
train_data, test_data = train_test_split(scaled_features, test_size=0.2, random_state=10)

# Splitting train data into features and labels
train_labels = train_data[:, 0]   # All rows, only the first column
train_features = train_data[:, 1:]  # All rows, all columns except the first

# Do the same for test_data if it's also a NumPy array
test_labels = test_data[:, 0]   # All rows, only the first column
test_features = test_data[:, 1:]  # All rows, all columns except the first

In [13]:
# Reshape data for LSTM input
train_features = train_features.reshape((train_features.shape[0], 1, train_features.shape[1]))
test_features = test_features.reshape((test_features.shape[0], 1, test_features.shape[1]))




In [14]:
# Define LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(1, train_features.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(1))

# model = Sequential()

# First LSTM layer with more units and return_sequences=True for stacking
# model.add(LSTM(units=50, return_sequences=True, input_shape=(1, train_features.shape[2])))
# model.add(Dropout(0.2))  # Dropout for regularization

# # Additional LSTM Layer
# model.add(LSTM(units=50, return_sequences=True))
# model.add(Dropout(0.2))  # Dropout for regularization

# # Another LSTM Layer
# model.add(LSTM(units=50, return_sequences=True))
# model.add(Dropout(0.2))  # Dropout for regularization

# # Final LSTM Layer - return_sequences is False by default
# model.add(LSTM(units=50))
# model.add(Dropout(0.2))  # Dropout for regularization

# Adding more Dense layers
# model.add(Dense(units=50, activation='relu'))
# model.add(Dense(units=25, activation='relu'))

# Output Layer
# model.add(Dense(1))

In [15]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')


In [16]:
# Train the model
model.fit(train_features, train_labels, epochs=25, batch_size=32, validation_data=(test_features, test_labels), verbose=1)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x24598366910>