In [2]:
# Import libraries
import numpy as np
import pandas as pd

In [3]:
# URL from STL FRED with all daily prices
historical_price_url = 'https://fred.stlouisfed.org/data/DCOILWTICO.txt'

In [4]:
raw_data = pd.read_csv(historical_price_url)

In [5]:
# How many rows should we drop to get to data
raw_data.head(15)

Unnamed: 0,Title: Crude Oil Prices: West Texas Intermediate (WTI) - Cushing,Oklahoma
0,Series ID: DCOILWTICO,
1,Source: U.S. Energy Information A...,
2,Release: Spot Prices (Not a Press ...,
3,Seasonal Adjustment: Not Seasonally Adjusted,
4,Frequency: Daily,
5,Units: Dollars per Barrel,
6,Date Range: 1986-01-02 to 2022-05-16,
7,Last Updated: 2022-05-18 12:11 PM CDT,
8,Notes: Definitions,Sources and Explanatory Notes
9,(http://www.eia.doe.gov/d...,


In [6]:
# Drop initial rows of non-price information
df = raw_data.tail(-10)
df

Unnamed: 0,Title: Crude Oil Prices: West Texas Intermediate (WTI) - Cushing,Oklahoma
10,DATE VALUE,
11,1986-01-02 25.56,
12,1986-01-03 26.00,
13,1986-01-06 26.53,
14,1986-01-07 25.85,
...,...,...
9494,2022-05-10 99.74,
9495,2022-05-11 105.50,
9496,2022-05-12 106.15,
9497,2022-05-13 110.52,


In [7]:
# Rename columns, drop first row, and drop 'OK' column
df.columns = ['Date Value','OK']
prices_df = df.iloc[1:].drop(columns='OK')

prices_df

Unnamed: 0,Date Value
11,1986-01-02 25.56
12,1986-01-03 26.00
13,1986-01-06 26.53
14,1986-01-07 25.85
15,1986-01-08 25.87
...,...
9494,2022-05-10 99.74
9495,2022-05-11 105.50
9496,2022-05-12 106.15
9497,2022-05-13 110.52


### We are going to need to break up the 'Date Value' column into 2 columns 
We will then convert the Value column into a float.
Then we will convert the date column to datetime and make it the index

In [8]:
# We need to convert df['Value'] to a float but we have these almost empty strings '   .' instead of nulls so we can't use .astype(float) on the column
# We need to figure out which error it creates so we can make a try-except statement
float('       .')

ValueError: could not convert string to float: '       .'

In [9]:
# Create a function that will convert the 'Value' to a float if it has a numerical equivalent or return Null if it is a string
def float_or_null(x):
    try:
        return float(x)
    except ValueError:
        return np.nan


In [10]:
# Test our function to make sure it works
print(float_or_null('3.65'))
print(float_or_null('       .'))

3.65
nan


In [11]:
# Split the column into 2 columns
prices_df['Date'] = prices_df['Date Value'].str.slice(0,10)
prices_df['Value'] = prices_df['Date Value'].str.slice(10,).apply(float_or_null)

In [12]:
prices_df

Unnamed: 0,Date Value,Date,Value
11,1986-01-02 25.56,1986-01-02,25.56
12,1986-01-03 26.00,1986-01-03,26.00
13,1986-01-06 26.53,1986-01-06,26.53
14,1986-01-07 25.85,1986-01-07,25.85
15,1986-01-08 25.87,1986-01-08,25.87
...,...,...,...
9494,2022-05-10 99.74,2022-05-10,99.74
9495,2022-05-11 105.50,2022-05-11,105.50
9496,2022-05-12 106.15,2022-05-12,106.15
9497,2022-05-13 110.52,2022-05-13,110.52


In [13]:
# Test to see that Nulls were created and how many
prices_df['Value'].isnull().sum()

323

In [14]:
# Confirm that ['Values'] is now float
prices_df.dtypes

Date Value     object
Date           object
Value         float64
dtype: object

In [15]:
# Now we need to drop the original 'Date Value' column, convert the 'Date' to datetime and set as index
prices_df.columns = ['Drop','Date','Oil Price']
prices_df['Date'] = pd.to_datetime(prices_df['Date'])
historical_prices_df = prices_df.drop(columns=['Drop']).set_index(['Date']).sort_index()

historical_prices_df

Unnamed: 0_level_0,Oil Price
Date,Unnamed: 1_level_1
1986-01-02,25.56
1986-01-03,26.00
1986-01-06,26.53
1986-01-07,25.85
1986-01-08,25.87
...,...
2022-05-10,99.74
2022-05-11,105.50
2022-05-12,106.15
2022-05-13,110.52


In [16]:
# Role it up to get monthly averages
monthly_historical_prices = historical_prices_df.resample(rule='MS').mean().round(2)

In [17]:
monthly_historical_prices

Unnamed: 0_level_0,Oil Price
Date,Unnamed: 1_level_1
1986-01-01,22.93
1986-02-01,15.45
1986-03-01,12.61
1986-04-01,12.84
1986-05-01,15.38
...,...
2022-01-01,83.22
2022-02-01,91.64
2022-03-01,108.50
2022-04-01,101.78
