In [1]:
# Initial imports
import pandas as pd
from pathlib import Path
import datetime as dt

## USD/AUD Data Cleaning

In [2]:
csv_path = Path("Fin Data/AUDUSD.csv")
usd_aud = pd.read_csv(csv_path, header=0)
usd_aud.tail()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
2821,7/02/2006,0.7399,0.7402,0.7448,0.7388,0.7413,26017,-0.002
2822,6/02/2006,0.7419,0.7422,0.75,0.741,0.7497,25226,-0.0072
2823,3/02/2006,0.7491,0.7494,0.7539,0.7472,0.7532,21137,-0.004
2824,2/02/2006,0.7531,0.7535,0.7555,0.7503,0.7525,25236,0.0006
2825,1/02/2006,0.7525,0.7529,0.7588,0.7522,0.7581,27553,-0.0061


In [3]:
# Drop unused columns
usd_aud.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
usd_aud.tail()

Unnamed: 0,Exchange Date,Bid
2821,7/02/2006,0.7399
2822,6/02/2006,0.7419
2823,3/02/2006,0.7491
2824,2/02/2006,0.7531
2825,1/02/2006,0.7525


In [4]:
# Check total rows and columns
usd_aud.shape

(2826, 2)

In [5]:
# Check for null values
usd_aud.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
2821,False,False
2822,False,False
2823,False,False
2824,False,False


In [6]:
# Determine number of nulls
usd_aud.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [7]:
usd_aud.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2821    False
2822    False
2823    False
2824    False
2825    False
Length: 2826, dtype: bool

## AUD/EUR data cleaning

In [8]:
csv_path = Path("Fin Data/AUDEUR.csv")
aud_eur = pd.read_csv(csv_path, header=0)
aud_eur.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,30/11/2016,0.6971,0.698,0.7043,0.6952,0.7024,306983.0,-0.0055
1,29/11/2016,0.7026,0.7029,0.7065,0.7016,0.7044,289319.0,-0.0019
2,28/11/2016,0.7045,0.7054,0.7073,0.698,0.7007,263349.0,0.0029
3,25/11/2016,0.7016,0.7047,0.7051,0.6996,0.7016,287934.0,0.0001
4,24/11/2016,0.7015,0.7024,0.7026,0.6977,0.6993,275598.0,0.0024


In [9]:
aud_eur.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
aud_eur

Unnamed: 0,Exchange Date,Bid
0,30/11/2016,0.6971
1,29/11/2016,0.7026
2,28/11/2016,0.7045
3,25/11/2016,0.7016
4,24/11/2016,0.7015
...,...,...
2821,7/02/2006,0.6177
2822,6/02/2006,0.6199
2823,3/02/2006,0.6228
2824,2/02/2006,0.6225


In [10]:
# Check for null values
aud_eur.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
2821,False,False
2822,False,False
2823,False,False
2824,False,False


In [11]:
# Determine number of nulls
aud_eur.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [12]:
aud_eur.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2821    False
2822    False
2823    False
2824    False
2825    False
Length: 2826, dtype: bool

## GBP/AUD Data Cleaning

In [13]:
csv_path = Path("Fin Data/AUDGBP.csv")
aud_gbp = pd.read_csv(csv_path, header=0)
aud_gbp.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,30/11/2016,0.5902,0.5909,0.6006,0.5892,0.599,314106.0,-0.0088
1,29/11/2016,0.599,0.5994,0.6042,0.5952,0.6021,292461.0,-0.0033
2,28/11/2016,0.6023,0.6031,0.6039,0.5943,0.5954,265222.0,0.0069
3,25/11/2016,0.5954,0.598,0.6,0.5936,0.5943,288133.0,0.0009
4,24/11/2016,0.5945,0.5952,0.5957,0.5918,0.5932,280617.0,0.0013


In [14]:
aud_gbp.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
aud_gbp

Unnamed: 0,Exchange Date,Bid
0,30/11/2016,0.5902
1,29/11/2016,0.5990
2,28/11/2016,0.6023
3,25/11/2016,0.5954
4,24/11/2016,0.5945
...,...,...
2821,7/02/2006,0.4243
2822,6/02/2006,0.4245
2823,3/02/2006,0.4250
2824,2/02/2006,0.4231


In [15]:
# Check for null values
aud_gbp.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
2821,False,False
2822,False,False
2823,False,False
2824,False,False


In [16]:
# Determine number of nulls
aud_gbp.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [17]:
aud_gbp.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2821    False
2822    False
2823    False
2824    False
2825    False
Length: 2826, dtype: bool

## AUD/HKD Data Cleaning

In [18]:
csv_path = Path("Fin Data/AUDHKD.csv")
aud_hdk = pd.read_csv(csv_path, header=0)
aud_hdk.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,30/11/2016,5.7257,5.7314,5.8146,5.7147,5.8037,153605.0,-0.078
1,29/11/2016,5.8037,5.8061,5.8145,5.76,5.8009,142490.0,0.0027
2,28/11/2016,5.801,5.8069,5.8118,5.7561,5.7592,127159.0,0.0387
3,25/11/2016,5.7623,5.7857,5.792,5.7346,5.7417,140939.0,0.0183
4,24/11/2016,5.744,5.7473,5.7529,5.7064,5.7275,134490.0,0.0196


In [19]:
aud_hdk.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
aud_hdk

Unnamed: 0,Exchange Date,Bid
0,30/11/2016,5.7257
1,29/11/2016,5.8037
2,28/11/2016,5.8010
3,25/11/2016,5.7623
4,24/11/2016,5.7440
...,...,...
2821,7/02/2006,5.7407
2822,6/02/2006,5.7552
2823,3/02/2006,5.8105
2824,2/02/2006,5.8420


In [20]:
# Check for null values
aud_hdk.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
2821,False,False
2822,False,False
2823,False,False
2824,False,False


In [21]:
# Determine number of nulls
aud_hdk.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [22]:
aud_hdk.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2821    False
2822    False
2823    False
2824    False
2825    False
Length: 2826, dtype: bool

## AUD/NZD Data Cleaning

In [23]:
csv_path = Path("Fin Data/AUDNZD.csv")
aud_nzd = pd.read_csv(csv_path, header=0)
aud_nzd.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,30/11/2016,0.9584,0.9596,0.9601,0.9515,0.9515,9905.0,0.0069
1,29/11/2016,0.9515,0.9526,0.9538,0.9443,0.9451,9313.0,0.0063
2,28/11/2016,0.9452,0.9455,0.949,0.9437,0.9454,8772.0,0.0013
3,25/11/2016,0.9439,0.9447,0.9457,0.9421,0.9448,3722.0,-0.0007
4,24/11/2016,0.9446,0.9459,0.9494,0.9434,0.9479,8142.0,-0.0029


In [24]:
aud_nzd.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
aud_nzd

Unnamed: 0,Exchange Date,Bid
0,30/11/2016,0.9584
1,29/11/2016,0.9515
2,28/11/2016,0.9452
3,25/11/2016,0.9439
4,24/11/2016,0.9446
...,...,...
2821,7/02/2006,0.9173
2822,6/02/2006,0.9173
2823,3/02/2006,0.9168
2824,2/02/2006,0.9158


In [25]:
# Check for null values
aud_nzd.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
2821,False,False
2822,False,False
2823,False,False
2824,False,False


In [26]:
# Determine number of nulls
aud_nzd.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [27]:
aud_nzd.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2821    False
2822    False
2823    False
2824    False
2825    False
Length: 2826, dtype: bool

## AUD/SGD Cleaning

In [28]:
csv_path = Path("Fin Data/AUDSGD.csv")
aud_sgd = pd.read_csv(csv_path, header=0)
aud_sgd.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,30/11/2016,1.058,1.0597,1.0673,1.0562,1.066,196583.0,-0.008
1,29/11/2016,1.066,1.0671,1.0683,1.0619,1.0648,181546.0,0.0011
2,28/11/2016,1.0649,1.0662,1.068,1.0564,1.0603,165029.0,0.0034
3,25/11/2016,1.0615,1.0664,1.0653,1.0588,1.0602,181718.0,0.0009
4,24/11/2016,1.0606,1.0614,1.0618,1.0542,1.0572,170027.0,0.004


In [29]:
aud_sgd.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
aud_sgd

Unnamed: 0,Exchange Date,Bid
0,30/11/2016,1.0580
1,29/11/2016,1.0660
2,28/11/2016,1.0649
3,25/11/2016,1.0615
4,24/11/2016,1.0606
...,...,...
2821,7/02/2006,1.2043
2822,6/02/2006,1.2085
2823,3/02/2006,1.2239
2824,2/02/2006,1.2298


In [30]:
# Check for null values
aud_nzd.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
2821,False,False
2822,False,False
2823,False,False
2824,False,False


In [31]:
# Determine number of nulls
aud_nzd.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [32]:
aud_nzd.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2821    False
2822    False
2823    False
2824    False
2825    False
Length: 2826, dtype: bool

# AUD/CAD Cleaning

In [33]:
csv_path = Path("Fin Data/AUDCAD.csv")
aud_cad = pd.read_csv(csv_path, header=0)
aud_cad.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,30/11/2016,0.9919,0.9927,1.0062,0.9904,1.0046,22853.0,-0.0131
1,29/11/2016,1.005,1.0055,1.0062,1.0003,1.0029,20904.0,0.0019
2,28/11/2016,1.0031,1.004,1.0094,1.0003,1.0032,18951.0,-0.0032
3,25/11/2016,1.0063,1.0073,1.0066,0.9981,0.9985,22213.0,0.0074
4,24/11/2016,0.9989,1.0,1.0002,0.9943,0.9955,14382.0,0.0036


In [34]:
aud_cad.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
aud_cad

Unnamed: 0,Exchange Date,Bid
0,30/11/2016,0.9919
1,29/11/2016,1.0050
2,28/11/2016,1.0031
3,25/11/2016,1.0063
4,24/11/2016,0.9989
...,...,...
2821,7/02/2006,0.8533
2822,6/02/2006,0.8504
2823,3/02/2006,0.8579
2824,2/02/2006,0.8618


In [35]:
# Check for null values
aud_cad.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
2821,False,False
2822,False,False
2823,False,False
2824,False,False


In [36]:
# Determine number of nulls
aud_cad.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [37]:
aud_cad.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2821    False
2822    False
2823    False
2824    False
2825    False
Length: 2826, dtype: bool

## Concat the data together

In [38]:
# Concat the data together
currency_data = pd.concat([usd_aud, aud_eur, aud_gbp, aud_nzd, aud_sgd, aud_cad], axis="columns", join="inner")
currency_data.set_index(usd_aud['Exchange Date'], inplace=True)
currency_data.drop(columns=['Exchange Date','Exchange Date','Exchange Date'], axis=1, inplace=True)
currency_data

Unnamed: 0_level_0,Bid,Bid,Bid,Bid,Bid,Bid
Exchange Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
30/11/2016,0.7382,0.6971,0.5902,0.9584,1.0580,0.9919
29/11/2016,0.7483,0.7026,0.5990,0.9515,1.0660,1.0050
28/11/2016,0.7479,0.7045,0.6023,0.9452,1.0649,1.0031
25/11/2016,0.7430,0.7016,0.5954,0.9439,1.0615,1.0063
24/11/2016,0.7406,0.7015,0.5945,0.9446,1.0606,0.9989
...,...,...,...,...,...,...
7/02/2006,0.7399,0.6177,0.4243,0.9173,1.2043,0.8533
6/02/2006,0.7419,0.6199,0.4245,0.9173,1.2085,0.8504
3/02/2006,0.7491,0.6228,0.4250,0.9168,1.2239,0.8579
2/02/2006,0.7531,0.6225,0.4231,0.9158,1.2298,0.8618


In [39]:
# Ammend column names to equal corresponding currency
columns = ['USD','EUR','GBP','NZD','SGD','CAD']
currency_data.columns = columns
currency_data.head()

Unnamed: 0_level_0,USD,EUR,GBP,NZD,SGD,CAD
Exchange Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
30/11/2016,0.7382,0.6971,0.5902,0.9584,1.058,0.9919
29/11/2016,0.7483,0.7026,0.599,0.9515,1.066,1.005
28/11/2016,0.7479,0.7045,0.6023,0.9452,1.0649,1.0031
25/11/2016,0.743,0.7016,0.5954,0.9439,1.0615,1.0063
24/11/2016,0.7406,0.7015,0.5945,0.9446,1.0606,0.9989


In [41]:
# Save the dataframe as a csv file
currency_data.to_csv('currency_data.csv')
