In [13]:
# **************************
# Author: Sebastien Vezina
# Date: 05-Nov-2020
# **************************

import os
import requests
import json
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv
from functools import partial

In [81]:
csvpath = Path('../../../02-Data/Dirty Data/Market Data/CAD_5yr_drt.csv')

In [82]:
cad_data = pd.read_csv(csvpath, infer_datetime_format=True)

In [83]:
cad_data.head()

Unnamed: 0,date,1. open,2. high,3. low,4. close
0,2014-11-24,0.8893,0.8907,0.8835,0.8859
1,2014-11-25,0.8856,0.8901,0.8832,0.888
2,2014-11-26,0.8881,0.8904,0.8847,0.8887
3,2014-11-27,0.8883,0.8899,0.8802,0.8816
4,2014-11-28,0.8812,0.882,0.8735,0.8755


In [84]:
# Keep only Date and Close columns
cad_data.drop(['1. open', '2. high', '3. low'], axis=1, inplace=True)

In [85]:
cad_data.head()

Unnamed: 0,date,4. close
0,2014-11-24,0.8859
1,2014-11-25,0.888
2,2014-11-26,0.8887
3,2014-11-27,0.8816
4,2014-11-28,0.8755


In [86]:
# Rename columns
cad_data.columns = ['Date', 'Close']

cad_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1557 entries, 0 to 1556
Data columns (total 2 columns):
Date     1557 non-null object
Close    1557 non-null float64
dtypes: float64(1), object(1)
memory usage: 24.4+ KB


In [87]:
# Select only from 2015-10-01 to 2020-11-01
start_date = pd.Timestamp("01/11/2015")
end_date = pd.Timestamp("01/11/2020")

#Set Date column to 'Timestamp' format
cad_data['Date'] = pd.to_datetime(cad_data['Date'])

#Slice Dataframe
cad_data = cad_data[(cad_data['Date'] > f(start_date)) & (cad_data['Date'] < f(end_date))]

# Set index back
cad_data.set_index("Date", inplace = True)

cad_data

In [88]:
cad_data.to_csv('../../../02-Data/Clean Data/Market Data/CAD_5yr_cln.csv')

In [93]:
#-------------------------------------------------------------------------------------------------------

In [95]:
# Clean TSX60 Dataframe
# TSX from 2015-10-01 to 2020-11-01
SP_TSX500_file = Path("../../../02-Data/Dirty Data/Market Data/SPTSX60 Historical.csv")
SP_TSX500_data = pd.read_csv(SP_TSX500_file, index_col="Date")
SP_TSX500_data

In [96]:
# Remove useles column
SP_TSX500_data.reset_index(inplace=True)

# Convert Date column to datetime type
SP_TSX500_data['Date'] = pd.to_datetime(SP_TSX500_data['Date'])

# Change column order and keep only relevant ones
SP_TSX500_data_sliced = SP_TSX500_data[['Date', 'Price']].set_index('Date')

SP_TSX500_data_sliced.head()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2020-10-30,928.9
2020-10-29,934.83
2020-10-28,930.9
2020-10-27,954.81
2020-10-26,959.91


In [98]:
# Save to CSV file
cad_data.to_csv('../../../02-Data/Clean Data/Market Data/SPTXS60_5yrs_cln.csv')