In [1]:
# **************************
# Author: Sebastien Vezina
# Date: 05-Nov-2020
# **************************
import os
import requests
import json
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv
from functools import partial

from alpha_vantage.foreignexchange import ForeignExchange

# Install package before importing: pip install alpha-vantage
# https://pypi.org/project/alpha-vantage/

In [2]:
# Load .env enviroment variables
load_dotenv()
alpha_vantage_key = os.getenv("ALPHAVANTAGE")

In [3]:
fx = ForeignExchange(key=alpha_vantage_key,output_format='pandas')
cad_data, meta_data = fx.get_currency_exchange_daily(from_symbol='CAD', to_symbol='USD', outputsize='full')

cad_data.head()

Unnamed: 0_level_0,1. open,2. high,3. low,4. close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-11-06,0.7641,0.7657,0.7637,0.7645
2020-11-05,0.7607,0.7674,0.7584,0.766
2020-11-04,0.7596,0.7634,0.7515,0.7609
2020-11-03,0.7562,0.7629,0.7551,0.7601
2020-11-02,0.7485,0.7565,0.7474,0.7561


In [4]:
# Keep only Date and Close columns
cad_data.drop(['1. open', '2. high', '3. low'], axis=1, inplace=True)

# Rename columns
cad_data.reset_index(inplace=True)
cad_data.columns = ['Date', 'Close']

cad_data.head()

Unnamed: 0,Date,Close
0,2020-11-06,0.7645
1,2020-11-05,0.766
2,2020-11-04,0.7609
3,2020-11-03,0.7601
4,2020-11-02,0.7561


In [5]:
# Select only from 2015-10-01 to 2020-11-01
start_date = "01/11/2015"
end_date = "01/11/2020"

f = partial(pd.to_datetime, dayfirst=True)
cad_data = cad_data[(cad_data['Date'] > f(start_date)) & (cad_data['Date'] < f(end_date))]



# Add extra Year column
cad_data['Year'] = pd.DatetimeIndex(cad_data['Date']).year

# Change column order 
cad_data = cad_data[['Date', 'Year', 'Close']]

# Set index back
cad_data.set_index("Date", inplace = True)

# Save to CSV file
cad_data.to_csv('../../02-Data/Clean Data/Market Data/CAD_5yrs.csv')

cad_data

Unnamed: 0_level_0,Year,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-10-30,2020,0.7499
2020-10-29,2020,0.7501
2020-10-28,2020,0.7503
2020-10-27,2020,0.7581
2020-10-26,2020,0.7565
...,...,...
2015-11-06,2015,0.7515
2015-11-05,2015,0.7591
2015-11-04,2015,0.7601
2015-11-03,2015,0.7652


In [6]:
# GSPTSE from 2015-10-01 to 2020-11-01
GSPTSE_file = Path("../../02-Data/Dirty Data/Market Data/GSPTSE.csv")
GSPTSE_data = pd.read_csv(GSPTSE_file, index_col="date")
GSPTSE_data

Unnamed: 0_level_0,open,high,low,close,adj close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-11-02,13531.000000,13632.700195,13515.900391,13623.000000,13623.000000,172222200
2015-11-03,13643.299805,13740.400391,13609.599609,13710.299805,13710.299805,213866400
2015-11-04,13745.799805,13790.099609,13637.299805,13661.799805,13661.799805,234638000
2015-11-05,13652.200195,13664.500000,13534.900391,13558.799805,13558.799805,225689200
2015-11-06,13562.200195,13593.700195,13481.500000,13553.299805,13553.299805,202938700
...,...,...,...,...,...,...
2020-10-26,16208.000000,16224.200195,15990.099609,16079.599609,16079.599609,219833500
2020-10-27,16084.700195,16104.599609,15999.200195,16020.900391,16020.900391,188804700
2020-10-28,15835.099609,15835.099609,15575.099609,15586.599609,15586.599609,277461700
2020-10-29,15633.599609,15738.200195,15514.599609,15670.700195,15670.700195,205663200


In [7]:
# Remove useles column
GSPTSE_data.reset_index(inplace=True)
GSPTSE_data = GSPTSE_data[['date', 'adj close']]

# Add extra Year column
GSPTSE_data['year'] = GSPTSE_data['date'].str.slice(0, 4)

# Change column order 
GSPTSE_data = GSPTSE_data[['date', 'year', 'adj close']]

GSPTSE_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  GSPTSE_data['year'] = GSPTSE_data['date'].str.slice(0, 4)


Unnamed: 0,date,year,adj close
0,2015-11-02,2015,13623.000000
1,2015-11-03,2015,13710.299805
2,2015-11-04,2015,13661.799805
3,2015-11-05,2015,13558.799805
4,2015-11-06,2015,13553.299805
...,...,...,...
1250,2020-10-26,2020,16079.599609
1251,2020-10-27,2020,16020.900391
1252,2020-10-28,2020,15586.599609
1253,2020-10-29,2020,15670.700195


In [8]:
# Save to CSV file
cad_data.to_csv('../../02-Data/Clean Data/Market Data/GSPTSE_5yrs.csv')