In [1]:
from google.colab import drive

# Mount on Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os

# Change directory to project folder
os.chdir("/content/drive/MyDrive/CS547/DeepDiveProject/Dataset")

In [3]:
import numpy as np
import pandas as pd
import pickle

In [4]:
# Read gasoline prices
gasoline_prices = pd.read_csv("gasoline_prices.csv")
gasoline_prices

Unnamed: 0,Date,Weekly U.S. All Grades All Formulations Retail Gasoline Prices (Dollars per Gallon)
0,"Apr 05, 1993",1.068
1,"Apr 12, 1993",1.079
2,"Apr 19, 1993",1.079
3,"Apr 26, 1993",1.086
4,"May 03, 1993",1.086
...,...,...
1642,"Sep 23, 2024",3.311
1643,"Sep 30, 2024",3.303
1644,"Oct 07, 2024",3.260
1645,"Oct 14, 2024",3.294


In [5]:
# Rename column names
gasoline_prices.columns = ["Date", "Gasoline Prices (Dollars per Gallon)"]

# Convert date string to pandas datetime
gasoline_prices["Date"] = pd.to_datetime(gasoline_prices["Date"])

# Convert datetime to week period
gasoline_prices["Date"] = gasoline_prices["Date"].dt.to_period("W")

In [6]:
# View processed gasoline prices
gasoline_prices

Unnamed: 0,Date,Gasoline Prices (Dollars per Gallon)
0,1993-04-05/1993-04-11,1.068
1,1993-04-12/1993-04-18,1.079
2,1993-04-19/1993-04-25,1.079
3,1993-04-26/1993-05-02,1.086
4,1993-05-03/1993-05-09,1.086
...,...,...
1642,2024-09-23/2024-09-29,3.311
1643,2024-09-30/2024-10-06,3.303
1644,2024-10-07/2024-10-13,3.260
1645,2024-10-14/2024-10-20,3.294


In [7]:
# Read net imports
net_imports = pd.read_csv("net_imports.csv")
net_imports

Unnamed: 0,Date,Weekly U.S. Net Imports of Crude Oil and Petroleum Products (Thousand Barrels per Day)
0,"Feb 08, 1991",5653
1,"Feb 15, 1991",5349
2,"Feb 22, 1991",4711
3,"Mar 01, 1991",4678
4,"Mar 08, 1991",6102
...,...,...
1754,"Sep 20, 2024",-2798
1755,"Sep 27, 2024",-2829
1756,"Oct 04, 2024",-3015
1757,"Oct 11, 2024",-3412


In [8]:
# Rename column names
net_imports.columns = ["Date", "Net Imports (Thousand Barrels per Day)"]

# Convert date string to pandas datetime
net_imports["Date"] = pd.to_datetime(net_imports["Date"])

# Convert datetime to week period
net_imports["Date"] = net_imports["Date"].dt.to_period("W")

In [9]:
# View processed net imports
net_imports

Unnamed: 0,Date,Net Imports (Thousand Barrels per Day)
0,1991-02-04/1991-02-10,5653
1,1991-02-11/1991-02-17,5349
2,1991-02-18/1991-02-24,4711
3,1991-02-25/1991-03-03,4678
4,1991-03-04/1991-03-10,6102
...,...,...
1754,2024-09-16/2024-09-22,-2798
1755,2024-09-23/2024-09-29,-2829
1756,2024-09-30/2024-10-06,-3015
1757,2024-10-07/2024-10-13,-3412


In [10]:
# Merge gasoline prices and net imports based on date
working_data = pd.merge(gasoline_prices, net_imports, on="Date", how="inner")
working_data

Unnamed: 0,Date,Gasoline Prices (Dollars per Gallon),Net Imports (Thousand Barrels per Day)
0,1993-04-05/1993-04-11,1.068,7970
1,1993-04-12/1993-04-18,1.079,6990
2,1993-04-19/1993-04-25,1.079,7729
3,1993-04-26/1993-05-02,1.086,8437
4,1993-05-03/1993-05-09,1.086,7086
...,...,...,...
1641,2024-09-16/2024-09-22,3.307,-2798
1642,2024-09-23/2024-09-29,3.311,-2829
1643,2024-09-30/2024-10-06,3.303,-3015
1644,2024-10-07/2024-10-13,3.260,-3412


In [11]:
# Make first 200 rows debugging data
debugging_data = working_data.iloc[0:200]

In [12]:
# Save processed data to pickle
debugging_data.to_pickle("debugging_dataset.pkl")
working_data.to_pickle("working_dataset.pkl")

In [13]:
# Try loading pickle data
debugging_data = pickle.load(open("debugging_dataset.pkl", "rb"))
working_data = pickle.load(open("working_dataset.pkl", "rb"))

In [14]:
debugging_data

Unnamed: 0,Date,Gasoline Prices (Dollars per Gallon),Net Imports (Thousand Barrels per Day)
0,1993-04-05/1993-04-11,1.068,7970
1,1993-04-12/1993-04-18,1.079,6990
2,1993-04-19/1993-04-25,1.079,7729
3,1993-04-26/1993-05-02,1.086,8437
4,1993-05-03/1993-05-09,1.086,7086
...,...,...,...
195,1996-12-30/1997-01-05,1.274,6961
196,1997-01-06/1997-01-12,1.272,8398
197,1997-01-13/1997-01-19,1.287,8219
198,1997-01-20/1997-01-26,1.287,9227


In [15]:
working_data

Unnamed: 0,Date,Gasoline Prices (Dollars per Gallon),Net Imports (Thousand Barrels per Day)
0,1993-04-05/1993-04-11,1.068,7970
1,1993-04-12/1993-04-18,1.079,6990
2,1993-04-19/1993-04-25,1.079,7729
3,1993-04-26/1993-05-02,1.086,8437
4,1993-05-03/1993-05-09,1.086,7086
...,...,...,...
1641,2024-09-16/2024-09-22,3.307,-2798
1642,2024-09-23/2024-09-29,3.311,-2829
1643,2024-09-30/2024-10-06,3.303,-3015
1644,2024-10-07/2024-10-13,3.260,-3412
