In [1]:
# Dependencies
import requests
import json
import pandas as pd


## WTI and WCS dataset

In [2]:
# URL for GET requests to retrieve vehicle data
url = "https://api.economicdata.alberta.ca/api/data?code=1da37895-ed56-405e-81de-26231ffc6472"
# Print the response object to the console
print(requests.get(url))
# Retrieving data and converting it into JSON
print(requests.get(url).json())
# Pretty Print the output of the JSON
response = requests.get(url).json()
print(json.dumps(response, indent=4, sort_keys=True))

<Response [200]>
[{'Date': '1986-01-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 22.93}, {'Date': '1986-02-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 15.46}, {'Date': '1986-03-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 12.61}, {'Date': '1986-04-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 12.84}, {'Date': '1986-05-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 15.38}, {'Date': '1986-06-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 13.43}, {'Date': '1986-07-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 11.59}, {'Date': '1986-08-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 15.1}, {'Date': '1986-09-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 14.87}, {'Date': '1986-10-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 14.9}, {'Date': '1986-11-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US/bbl', 'Value': 15.22}, {'Date': '1986-12-01T00:00:00', 'Type ': 'WTI', 'Unit': '$US

In [3]:
# Retrieve data and convert it into JSON
df = pd.DataFrame(response) 
df.tail()

Unnamed: 0,Date,Type,Unit,Value
705,2024-11-01T00:00:00,WTI,$US/bbl,69.95
706,2024-12-01T00:00:00,WCS,$US/bbl,57.76
707,2024-12-01T00:00:00,WTI,$US/bbl,70.12
708,2025-01-01T00:00:00,WCS,$US/bbl,62.86
709,2025-01-01T00:00:00,WTI,$US/bbl,75.74


In [4]:
#Cleaning up the database
df['Date'] = pd.to_datetime(df['Date'])  # Convert 'Date' to datetime
df['Type'] = df['Type '].str.strip()  # Remove trailing spaces in 'Type '
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
WCS = df[df['Type'] == 'WCS'][['Year', 'Month', 'Unit', 'Value']]
WTI = df[df['Type'] == 'WTI'][['Year', 'Month', 'Unit', 'Value']]

#rename the columns
WCS.rename(columns={'Value': 'WCS'}, inplace=True)
WTI.rename(columns={'Value': 'WTI'}, inplace=True)

#merge the WSC and WTI
df_crude_oil = pd.merge(WCS,WTI, on=['Year','Month'], how='outer')[['Year','Month','WCS','WTI']]

df_crude_oil.tail()

Unnamed: 0,Year,Month,WCS,WTI
464,2024,9,55.9,70.24
465,2024,10,57.86,71.99
466,2024,11,57.56,69.95
467,2024,12,57.76,70.12
468,2025,1,62.86,75.74


## Interest rate and inflation rate

### clean up inflation rate and interest rate in Canada

In [5]:
# loading the Canada interest rate and inflation rate 
inlfation_cad = pd.read_csv("Resources/cad/canada inflation rate_CPI.csv")
interest_cad = pd.read_csv("Resources/cad/canada interest rate.csv")


In [6]:
inlfation_cad.head()


Unnamed: 0,REF_DATE,GEO,DGUID,Alternative measures,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2014-01,Canada,2016A000011124,Measure of core inflation based on a factor mo...,Percent,239,units,0,v108785713,1.1,1.3,,,,1
1,2014-02,Canada,2016A000011124,Measure of core inflation based on a factor mo...,Percent,239,units,0,v108785713,1.1,1.3,,,,1
2,2014-03,Canada,2016A000011124,Measure of core inflation based on a factor mo...,Percent,239,units,0,v108785713,1.1,1.4,,,,1
3,2014-04,Canada,2016A000011124,Measure of core inflation based on a factor mo...,Percent,239,units,0,v108785713,1.1,1.4,,,,1
4,2014-05,Canada,2016A000011124,Measure of core inflation based on a factor mo...,Percent,239,units,0,v108785713,1.1,1.4,,,,1


In [7]:
inlfation_cad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   REF_DATE              266 non-null    object 
 1   GEO                   266 non-null    object 
 2   DGUID                 266 non-null    object 
 3   Alternative measures  266 non-null    object 
 4   UOM                   266 non-null    object 
 5   UOM_ID                266 non-null    int64  
 6   SCALAR_FACTOR         266 non-null    object 
 7   SCALAR_ID             266 non-null    int64  
 8   VECTOR                266 non-null    object 
 9   COORDINATE            266 non-null    float64
 10  VALUE                 266 non-null    float64
 11  STATUS                0 non-null      float64
 12  SYMBOL                0 non-null      float64
 13  TERMINATED            0 non-null      float64
 14  DECIMALS              266 non-null    int64  
dtypes: float64(5), int64(3)

In [8]:
#clean up the inflation rate table
inlfation_cad['REF_DATE'] = pd.to_datetime(inlfation_cad['REF_DATE'])
inlfation_cad['Year']=inlfation_cad['REF_DATE'].dt.year
inlfation_cad['Month']=inlfation_cad['REF_DATE'].dt.month
df_inflation_cad = inlfation_cad[['Year', 'Month', 'VALUE']]

#rename the column
df_inflation_cad.rename(columns={'VALUE': 'Inflation'}, inplace=True)

df_inflation_cad.head()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_inflation_cad.rename(columns={'VALUE': 'Inflation'}, inplace=True)


Unnamed: 0,Year,Month,Inflation
0,2014,1,1.3
1,2014,2,1.3
2,2014,3,1.4
3,2014,4,1.4
4,2014,5,1.4


In [9]:
# check first five rows of interest rate data
interest_cad.head()

Unnamed: 0,REF_DATE,GEO,DGUID,Financial market statistics,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2014-01-01,Canada,2016A000011124,Overnight money market financing,Percent,239,units,0,v39050,1.12,,..,,,4
1,2014-01-02,Canada,2016A000011124,Overnight money market financing,Percent,239,units,0,v39050,1.12,0.9995,,,,4
2,2014-01-03,Canada,2016A000011124,Overnight money market financing,Percent,239,units,0,v39050,1.12,1.0016,,,,4
3,2014-01-04,Canada,2016A000011124,Overnight money market financing,Percent,239,units,0,v39050,1.12,,..,,,4
4,2014-01-05,Canada,2016A000011124,Overnight money market financing,Percent,239,units,0,v39050,1.12,,..,,,4


In [10]:
#clean the interest rate data and build monthly average rate dataframe
# Extract the relevant data
cad_10_bond = interest_cad[interest_cad['Financial market statistics'] == 'Government of Canada benchmark bond yields, 10 year']
cad_rate = interest_cad[interest_cad['Financial market statistics'] == 'Overnight money market financing']

# Merge the two DataFrames on the 'REF_DATE' to align them
merged_df = pd.merge(cad_10_bond[['REF_DATE', 'VALUE']], cad_rate[['REF_DATE', 'VALUE']], on='REF_DATE', suffixes=('_10_year', '_overnight'))

# Rename the columns for clarity
merged_df.columns = ['Date', 'Ten_Year_Bond_Yield', 'Overnight_Rate']

# Create the final DataFrame
df_rate_daily = merged_df


In [11]:
df_rate_daily.head()

Unnamed: 0,Date,Ten_Year_Bond_Yield,Overnight_Rate
0,2014-01-01,,
1,2014-01-02,2.74,0.9995
2,2014-01-03,2.75,1.0016
3,2014-01-04,,
4,2014-01-05,,


In [12]:

# update the daily value to monthly average
df_rate_daily['Date'] = pd.to_datetime(df_rate_daily['Date'])

# Set the observation_date as the index
df_rate_daily.set_index('Date', inplace=True)

# Now resample by month and calculate the mean
monthly_average = df_rate_daily.resample('M').mean()

# Create a new DataFrame with monthly averages and reset the index
df_monthly_average = monthly_average.reset_index()

df_monthly_average ['Year']=df_monthly_average['Date'].dt.year
df_monthly_average ['Month']=df_monthly_average['Date'].dt.month

# Display the new DataFrame
df_monthly_average.head()

  monthly_average = df_rate_daily.resample('M').mean()


Unnamed: 0,Date,Ten_Year_Bond_Yield,Overnight_Rate,Year,Month
0,2014-01-31,2.534091,0.999745,2014,1
1,2014-02-28,2.42,0.998158,2014,2
2,2014-03-31,2.454286,0.996481,2014,3
3,2014-04-30,2.444762,0.998171,2014,4
4,2014-05-31,2.315238,0.997471,2014,5


In [13]:
#Merge the inflation and interest rate and create new dataframe
df_cad_rates=pd.merge(df_inflation_cad,df_monthly_average, on=['Year','Month'], how='outer')
df_cad_rates = df_cad_rates[['Year', 'Month', 'Inflation', 'Ten_Year_Bond_Yield', 'Overnight_Rate']]

df_cad_rates.head()

Unnamed: 0,Year,Month,Inflation,Ten_Year_Bond_Yield,Overnight_Rate
0,2014,1,1.3,2.534091,0.999745
1,2014,1,121.3,2.534091,0.999745
2,2014,2,1.3,2.42,0.998158
3,2014,2,122.1,2.42,0.998158
4,2014,3,1.4,2.454286,0.996481


### clean up inflation rate and interest rate in US

In [14]:
# loading the interest rate and inflation rate 
inflation_us = pd.read_csv("Resources/fred/daily.csv")
Fed_rate = pd.read_csv("Resources/fred/daily,_7-day.csv")
Fed_rate_10 = pd.read_csv("Resources/fred/monthly.csv")



In [15]:
Fed_rate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4798 entries, 0 to 4797
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   observation_date  4798 non-null   object 
 1   DFF               4798 non-null   float64
dtypes: float64(1), object(1)
memory usage: 75.1+ KB


In [16]:
# update the daily value to monthly average for inflation rate and Fed_rate
inflation_us['observation_date'] = pd.to_datetime(inflation_us['observation_date'])
Fed_rate['observation_date'] = pd.to_datetime(Fed_rate['observation_date'])

# Set the observation_date as the index
inflation_us.set_index('observation_date', inplace=True)
Fed_rate.set_index('observation_date', inplace=True)

# calculate the monthly average
inflation_us_monthly = inflation_us.resample('M').mean()
fed_rate_monthly = Fed_rate.resample('M').mean()



  inflation_us_monthly = inflation_us.resample('M').mean()
  fed_rate_monthly = Fed_rate.resample('M').mean()


In [17]:
# Create Year and Month columns from the index
inflation_us_monthly['Year'] = inflation_us_monthly.index.year
inflation_us_monthly['Month'] = inflation_us_monthly.index.month

fed_rate_monthly['Year'] = fed_rate_monthly.index.year
fed_rate_monthly['Month'] = fed_rate_monthly.index.month



In [18]:
Fed_rate_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   observation_date  157 non-null    object 
 1   IRLTLT01USM156N   157 non-null    float64
dtypes: float64(1), object(1)
memory usage: 2.6+ KB


In [19]:
#prepare the date before merge
Fed_rate_10['date'] = pd.to_datetime(Fed_rate_10['observation_date'])
Fed_rate_10['Year']=Fed_rate_10['date'].dt.year
Fed_rate_10['Month']=Fed_rate_10['date'].dt.month

# Merge all interest rate and inflation rate
df_fed_rates = pd.merge(inflation_us_monthly, fed_rate_monthly, on=['Year', 'Month'], how='outer')
df_fed_rates = pd.merge(df_fed_rates, Fed_rate_10, on=['Year', 'Month'], how='outer')

#rename the columns
df_fed_rates.rename(columns={'DFF': 'Federal_Rate', 'T10YIE': 'Inflation', 'IRLTLT01USM156N': 'Ten_Year_Bond_Yield' }, inplace=True)

df_fed_rates = df_fed_rates[['Year', 'Month', 'Inflation', 'Ten_Year_Bond_Yield', 'Federal_Rate']]


In [20]:
df_fed_rates.head()

Unnamed: 0,Year,Month,Inflation,Ten_Year_Bond_Yield,Federal_Rate
0,2012,1,2.0745,1.97,0.083333
1,2012,2,2.222,1.97,0.103793
2,2012,3,2.314091,2.17,0.126774
3,2012,4,2.266667,2.05,0.139333
4,2012,5,2.144545,1.8,0.155806


## Save the cleaned dataset

In [21]:
df_crude_oil.to_csv('Cleaned_Data/df_crude_oil.csv',index=False, encoding='utf-8')
df_cad_rates.to_csv('Cleaned_Data/cad_rates.csv',index=False, encoding='utf-8')
df_fed_rates.to_csv('Cleaned_Data/fed_rates.csv',index=False, encoding='utf-8')