**Data Import**

In [1]:
import os
import numpy
import pandas
import time
import random
import matplotlib
import sklearn.linear_model
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import scipy.stats
import matplotlib.offsetbox as offsetbox
from matplotlib.ticker import StrMethodFormatter
import imageio
import PIL
from sklearn.model_selection import train_test_split
import torch
from itertools import product


def saver(fname):
    plt.savefig(fname+".png",bbox_inches="tight")

def legend(pos="bottom",ncol=3,extra=False):
    if pos=="bottom":
        extra = 0.15 if extra else 0
        plt.legend(bbox_to_anchor=(0.5,-0.2-extra), loc='upper center',facecolor="lightgray",ncol=ncol)
    elif pos=="side":
        plt.legend(bbox_to_anchor=(1.1,0.5), loc='center left',facecolor="lightgray",ncol=1)

def textbox(txt,fname=None):
    plt.figure(figsize=(1,1))
    plt.gca().add_artist(offsetbox.AnchoredText("\n".join(txt), loc="center",prop=dict(size=30)))
    plt.axis('off')
    if fname is not None:
        saver(fname)
    plt.show()
    plt.close()

In [2]:
def getfile(location_pair,**kwargs): #tries to get local version and then defaults to google drive version
    (loc,gdrive)=location_pair
    try:
        out=pandas.read_csv(loc,**kwargs)
    except FileNotFoundError:
        print("local file not found; accessing Google Drive")
        loc = 'https://drive.google.com/uc?export=download&id='+gdrive.split('/')[-2]
        out=pandas.read_csv(loc,**kwargs)
    return out

In [3]:
raw_data=("US_Gas_Data with season.csv","https://drive.google.com/file/d/1_U6DmeQvqGuLhSmAk_EHWnBnFL1nxjiF/view?usp=sharing")

In [4]:
data_raw=getfile(raw_data)
data_raw.head()

local file not found; accessing Google Drive


Unnamed: 0,Date for US Imports and Exports,Weekly U.S. Exports of Crude Oil and Petroleum Products (Thousand Barrels per Day),Weekly U.S. Exports of Crude Oil (Thousand Barrels per Day),Weekly U.S. Exports of Total Petroleum Products (Thousand Barrels per Day),Date for Retail Gas Price,Weekly U.S. All Grades All Formulations Retail Gasoline Prices (Dollars per Gallon),Season
0,9-Nov-01,10772,9639,1133,12-Nov-01,1.224,autumn
1,16-Nov-01,10243,8879,1364,19-Nov-01,1.208,autumn
2,23-Nov-01,9576,8187,1389,26-Nov-01,1.168,autumn
3,30-Nov-01,11170,9856,1314,3-Dec-01,1.149,autumn
4,7-Dec-01,9885,8966,919,10-Dec-01,1.136,winter


In [5]:
data_raw.to_pickle("./pickeddata.pkl")

In [6]:
data_new = pandas.read_pickle("./pickeddata.pkl")
data_new

Unnamed: 0,Date for US Imports and Exports,Weekly U.S. Exports of Crude Oil and Petroleum Products (Thousand Barrels per Day),Weekly U.S. Exports of Crude Oil (Thousand Barrels per Day),Weekly U.S. Exports of Total Petroleum Products (Thousand Barrels per Day),Date for Retail Gas Price,Weekly U.S. All Grades All Formulations Retail Gasoline Prices (Dollars per Gallon),Season
0,9-Nov-01,10772,9639,1133,12-Nov-01,1.224,autumn
1,16-Nov-01,10243,8879,1364,19-Nov-01,1.208,autumn
2,23-Nov-01,9576,8187,1389,26-Nov-01,1.168,autumn
3,30-Nov-01,11170,9856,1314,3-Dec-01,1.149,autumn
4,7-Dec-01,9885,8966,919,10-Dec-01,1.136,winter
...,...,...,...,...,...,...,...
1137,25-Aug-23,-1684,2089,-3773,28-Aug-23,3.931,summer
1138,1-Sep-23,-2593,1838,-4432,4-Sep-23,3.925,autumn
1139,8-Sep-23,431,4492,-4061,11-Sep-23,3.941,autumn
1140,15-Sep-23,-2290,1450,-3741,18-Sep-23,4.001,autumn


In [7]:
season = {'spring': 0, 'summer': 1, 'autumn':2, 'winter': 3}
data_new.Season = [season[i] for i in data_new.Season]
data_new

Unnamed: 0,Date for US Imports and Exports,Weekly U.S. Exports of Crude Oil and Petroleum Products (Thousand Barrels per Day),Weekly U.S. Exports of Crude Oil (Thousand Barrels per Day),Weekly U.S. Exports of Total Petroleum Products (Thousand Barrels per Day),Date for Retail Gas Price,Weekly U.S. All Grades All Formulations Retail Gasoline Prices (Dollars per Gallon),Season
0,9-Nov-01,10772,9639,1133,12-Nov-01,1.224,2
1,16-Nov-01,10243,8879,1364,19-Nov-01,1.208,2
2,23-Nov-01,9576,8187,1389,26-Nov-01,1.168,2
3,30-Nov-01,11170,9856,1314,3-Dec-01,1.149,2
4,7-Dec-01,9885,8966,919,10-Dec-01,1.136,3
...,...,...,...,...,...,...,...
1137,25-Aug-23,-1684,2089,-3773,28-Aug-23,3.931,1
1138,1-Sep-23,-2593,1838,-4432,4-Sep-23,3.925,2
1139,8-Sep-23,431,4492,-4061,11-Sep-23,3.941,2
1140,15-Sep-23,-2290,1450,-3741,18-Sep-23,4.001,2


Data Preprocessing

In [8]:
XY=data_new[['Date for US Imports and Exports','Weekly U.S. Exports of Crude Oil and Petroleum Products (Thousand Barrels per Day)','Season','Weekly U.S. All Grades All Formulations Retail Gasoline Prices (Dollars per Gallon)']].dropna(axis='index')
(feature1,featurename1)=('Date for US Imports and Exports',"Year")
(feature2,featurescale,featurename2)=('Weekly U.S. Exports of Crude Oil and Petroleum Products (Thousand Barrels per Day)',1000,"Exports")
(feature3,featurename3)=('Season','Season')
(label,labelname)=('Weekly U.S. All Grades All Formulations Retail Gasoline Prices (Dollars per Gallon)',"Prices (Dollars per Gallon)")

XY.columns=[featurename1,featurename2,featurename3,labelname]
XY[featurename1]=pandas.to_datetime(XY[featurename1]).dt.date

print(XY)

X=XY[featurename1].squeeze()   #only date
Y=XY[labelname].squeeze()
print(X)
print(Y)

            Year  Exports  Season  Prices (Dollars per Gallon)
0     2001-11-09    10772       2                        1.224
1     2001-11-16    10243       2                        1.208
2     2001-11-23     9576       2                        1.168
3     2001-11-30    11170       2                        1.149
4     2001-12-07     9885       3                        1.136
...          ...      ...     ...                          ...
1137  2023-08-25    -1684       1                        3.931
1138  2023-09-01    -2593       2                        3.925
1139  2023-09-08      431       2                        3.941
1140  2023-09-15    -2290       2                        4.001
1141  2023-09-22    -1706       2                        3.963

[1142 rows x 4 columns]
0       2001-11-09
1       2001-11-16
2       2001-11-23
3       2001-11-30
4       2001-12-07
           ...    
1137    2023-08-25
1138    2023-09-01
1139    2023-09-08
1140    2023-09-15
1141    2023-09-22
Name: Year

In [9]:
X = X.values
Y = Y.values

In [10]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from keras.preprocessing.sequence import TimeseriesGenerator

In [11]:
split_percent = 0.80
split = int(split_percent*len(Y))
y_train = Y[:split]
y_test = Y[split:]

In [12]:
x_train = X[:split]
x_test = X[split:]

In [None]:
x_train=np.arange(1,914)  # generate numbers for x axis plot as date but in the future may replaced with real datetime.

In [None]:
x_test=np.arange(914,1143)

In [13]:
look_back = 10

train_generator = TimeseriesGenerator(y_train, y_train, length=look_back, batch_size=20)
test_generator = TimeseriesGenerator(y_test, y_test, length=look_back, batch_size=1)

In [22]:
model = Sequential()
model.add(
    LSTM(10,
        activation='relu',
        input_shape=(look_back,1))
)
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

num_epochs = 25
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)

Epoch 1/25



`Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.



Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x7e324c0425f0>

In [23]:
prediction = model.predict_generator(test_generator)

y_train = y_train.reshape((-1))
y_test = y_test.reshape((-1))
prediction = prediction.reshape((-1))

trace1 = go.Scatter(
    x = x_train,
    y = y_train,
    mode = 'lines',
    name = 'training Data'
)
trace2 = go.Scatter(
    x = x_test[10:],    # prediction takes 30 previous values so should plot delayed 30 datapoints
    y = prediction,
    mode = 'lines',
    name = 'Prediction'
)
trace3 = go.Scatter(
    x = x_test,
    y = y_test,
    mode='lines',
    name = 'Ground Truth'
)
layout = go.Layout(
    title = "gasoline price",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "price"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()


`Model.predict_generator` is deprecated and will be removed in a future version. Please use `Model.predict`, which supports generators.



In [25]:
MSE_numpy = np.mean(np.square(np.subtract(y_test[10:],prediction)))
print ("MSE using Numpy:  ", MSE_numpy)
MAE_numpy = np.mean(np.abs(np.subtract(y_test[10:],prediction)))
print ("MAE using Numpy:  ", MAE_numpy)

MSE using Numpy:   0.021191712883487115
MAE using Numpy:   0.1002143348754813
