In [1]:
# Import dependencies
import pandas as pd
import datetime as dt

In [2]:
# Import and read the NYMEX Crude Oil Historical Futures Prices data
file_path = "../Resources/raw_datasets/"
futures_price_file = f"{file_path}/NYMEX_Crude_Oil_Historical_Futures_Prices.csv"

futures_price_df = pd.read_csv(futures_price_file, skiprows = 2)
futures_price_df.head()

Unnamed: 0,Date,"Cushing, OK Crude Oil Future Contract 1 (Dollars per Barrel)","Cushing, OK Crude Oil Future Contract 2 (Dollars per Barrel)","Cushing, OK Crude Oil Future Contract 3 (Dollars per Barrel)","Cushing, OK Crude Oil Future Contract 4 (Dollars per Barrel)",Unnamed: 5
0,"Mar 30, 1983",,,29.35,,
1,"Mar 31, 1983",,,29.24,,
2,"Apr 04, 1983",29.44,,29.1,,
3,"Apr 05, 1983",29.71,,29.35,,
4,"Apr 06, 1983",29.92,,29.5,,


In [3]:
# Check all of the column names that were imported
futures_price_df.columns.tolist()

['Date',
 'Cushing, OK Crude Oil Future Contract 1 (Dollars per Barrel)',
 'Cushing, OK Crude Oil Future Contract 2 (Dollars per Barrel)',
 'Cushing, OK Crude Oil Future Contract 3 (Dollars per Barrel)',
 'Cushing, OK Crude Oil Future Contract 4 (Dollars per Barrel)',
 'Unnamed: 5']

In [4]:
# Create a DataFrame with the columns that we want to keep
futures_price_df = futures_price_df[[
    "Date", 
    "Cushing, OK Crude Oil Future Contract 1 (Dollars per Barrel)"]]

futures_price_df.head()

Unnamed: 0,Date,"Cushing, OK Crude Oil Future Contract 1 (Dollars per Barrel)"
0,"Mar 30, 1983",
1,"Mar 31, 1983",
2,"Apr 04, 1983",29.44
3,"Apr 05, 1983",29.71
4,"Apr 06, 1983",29.92


In [5]:
# Rename the columns
futures_price_df.columns = ["Date", 
                            "Cushing, OK Crude Oil Future Contract ($/Bar.)"]
futures_price_df.head()

Unnamed: 0,Date,"Cushing, OK Crude Oil Future Contract ($/Bar.)"
0,"Mar 30, 1983",
1,"Mar 31, 1983",
2,"Apr 04, 1983",29.44
3,"Apr 05, 1983",29.71
4,"Apr 06, 1983",29.92


In [6]:
# Check the datatypes
futures_price_df.dtypes

Date                                               object
Cushing, OK Crude Oil Future Contract ($/Bar.)    float64
dtype: object

In [7]:
# Check how many records were pulled in
len(futures_price_df)

9743

In [8]:
# Determine if there are any missing values in the data
futures_price_df.isnull().sum()

Date                                              1
Cushing, OK Crude Oil Future Contract ($/Bar.)    6
dtype: int64

In [9]:
# Drop the null values
futures_price_df = futures_price_df.dropna()

In [10]:
# Determine if there are any missing values in the data
futures_price_df.isnull().sum()

Date                                              0
Cushing, OK Crude Oil Future Contract ($/Bar.)    0
dtype: int64

In [11]:
# Convert Data types
futures_price_df["Date"] = pd.to_datetime(futures_price_df["Date"])
futures_price_df.head()

Unnamed: 0,Date,"Cushing, OK Crude Oil Future Contract ($/Bar.)"
2,1983-04-04,29.44
3,1983-04-05,29.71
4,1983-04-06,29.92
5,1983-04-07,30.17
6,1983-04-08,30.38


In [12]:
# Filter the DataFrame between two dates
futures_price_df = futures_price_df[(futures_price_df["Date"] >= "1986-01-01") & (futures_price_df["Date"] <= "2021-10-31")]
futures_price_df.head()

Unnamed: 0,Date,"Cushing, OK Crude Oil Future Contract ($/Bar.)"
691,1986-01-02,25.56
692,1986-01-03,25.97
693,1986-01-06,26.57
694,1986-01-07,26.2
695,1986-01-08,25.93


In [13]:
futures_price_df.tail()

Unnamed: 0,Date,"Cushing, OK Crude Oil Future Contract ($/Bar.)"
9688,2021-10-25,83.76
9689,2021-10-26,84.65
9690,2021-10-27,82.66
9691,2021-10-28,82.81
9692,2021-10-29,83.57


In [14]:
# Calculate the average for each month and year for all columns
avg_future_contract = futures_price_df["Date"].dt.to_period("M")  

futures_price_df = futures_price_df.groupby(avg_future_contract).mean()
futures_price_df.head()

Unnamed: 0_level_0,"Cushing, OK Crude Oil Future Contract ($/Bar.)"
Date,Unnamed: 1_level_1
1986-01,22.976818
1986-02,15.457368
1986-03,12.615
1986-04,12.753636
1986-05,15.264286


In [15]:
# Round to specific decimals places 
futures_price_df = futures_price_df.round(decimals = 1)
futures_price_df.head()

Unnamed: 0_level_0,"Cushing, OK Crude Oil Future Contract ($/Bar.)"
Date,Unnamed: 1_level_1
1986-01,23.0
1986-02,15.5
1986-03,12.6
1986-04,12.8
1986-05,15.3


In [16]:
# Convert type from pandas period to string
futures_price_df.index = futures_price_df.index.strftime("%b-%Y")
futures_price_df.head()

Unnamed: 0_level_0,"Cushing, OK Crude Oil Future Contract ($/Bar.)"
Date,Unnamed: 1_level_1
Jan-1986,23.0
Feb-1986,15.5
Mar-1986,12.6
Apr-1986,12.8
May-1986,15.3


In [17]:
# Convert the Index to Column
futures_price_df.reset_index(inplace=True)
futures_price_df.head()

Unnamed: 0,Date,"Cushing, OK Crude Oil Future Contract ($/Bar.)"
0,Jan-1986,23.0
1,Feb-1986,15.5
2,Mar-1986,12.6
3,Apr-1986,12.8
4,May-1986,15.3


In [18]:
# Export the Dataframe as a new CSV file without the index.
futures_price_df.to_csv("../Resources/clean_datasets/Cleaned_NYMEX_Crude_Oil_Historical_Futures_Prices.csv", index=False)