In [1]:
# Initial imports
import pandas as pd
from pathlib import Path
import datetime as dt

## USD/AUD Data Cleaning

In [2]:
csv_path = Path("Fin Data/AUDUSD.csv")
usd_aud = pd.read_csv(csv_path, header=0)
usd_aud.tail()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
5213,16-May-2002,0.5484,0.5489,0.5505,0.547,0.5489,,-0.0004
5214,15-May-2002,0.5488,0.5493,0.5493,0.5443,0.5483,,0.0011
5215,14-May-2002,0.5477,0.5482,0.5483,0.5444,0.5468,,0.0006
5216,13-May-2002,0.5471,0.5476,0.5479,0.5435,0.5439,,0.0029
5217,10-May-2002,0.5442,0.5447,0.5451,0.5418,0.5433,,


In [3]:
# Drop unused columns
usd_aud.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
usd_aud.tail()

Unnamed: 0,Exchange Date,Bid
5213,16-May-2002,0.5484
5214,15-May-2002,0.5488
5215,14-May-2002,0.5477
5216,13-May-2002,0.5471
5217,10-May-2002,0.5442


In [4]:
# Check total rows and columns
usd_aud.shape

(5218, 2)

In [5]:
usd_aud = usd_aud.drop(labels=[5217], axis=0)

In [6]:
# Check for null values
usd_aud.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
5212,False,False
5213,False,False
5214,False,False
5215,False,False


In [7]:
# Determine number of nulls
usd_aud.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [8]:
usd_aud.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
5212    False
5213    False
5214    False
5215    False
5216    False
Length: 5217, dtype: bool

## AUD/EUR data cleaning

In [9]:
csv_path = Path("Fin Data/AUDEUR.csv")
aud_eur = pd.read_csv(csv_path, header=0)
aud_eur.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,10-May-2022,0.6592,0.6595,0.6603,0.6541,0.6587,63404.0,0.0005
1,09-May-2022,0.6587,0.6591,0.6707,0.6573,0.6694,177221.0,-0.0118
2,06-May-2022,0.6705,0.6709,0.676,0.6677,0.6744,200889.0,-0.0039
3,05-May-2022,0.6744,0.6749,0.6836,0.6729,0.6835,198266.0,-0.0091
4,04-May-2022,0.6835,0.6839,0.6848,0.6735,0.6741,157781.0,0.0094


In [10]:
aud_eur.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
aud_eur

Unnamed: 0,Exchange Date,Bid
0,10-May-2022,0.6592
1,09-May-2022,0.6587
2,06-May-2022,0.6705
3,05-May-2022,0.6744
4,04-May-2022,0.6835
...,...,...
5212,16-May-2002,0.6014
5213,15-May-2002,0.6012
5214,14-May-2002,0.6068
5215,13-May-2002,0.6001


In [11]:
# Check for null values
aud_eur.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
5212,False,False
5213,False,False
5214,False,False
5215,False,False


In [12]:
# Determine number of nulls
aud_eur.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [13]:
aud_eur.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
5212    False
5213    False
5214    False
5215    False
5216    False
Length: 5217, dtype: bool

## GBP/AUD Data Cleaning

In [14]:
csv_path = Path("Fin Data/AUDGBP.csv")
aud_gbp = pd.read_csv(csv_path, header=0)
aud_gbp.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,10-May-2022,0.5639,0.5641,0.5647,0.5599,0.5639,89237.0,0.0001
1,09-May-2022,0.5638,0.5642,0.5735,0.5629,0.5735,215522.0,-0.0097
2,06-May-2022,0.5735,0.5738,0.5774,0.5727,0.575,253153.0,-0.0015
3,05-May-2022,0.575,0.5753,0.5819,0.5732,0.5752,245991.0,-0.0002
4,04-May-2022,0.5752,0.5755,0.5762,0.5672,0.5675,200667.0,0.0077


In [15]:
aud_gbp.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
aud_gbp

Unnamed: 0,Exchange Date,Bid
0,10-May-2022,0.5639
1,09-May-2022,0.5638
2,06-May-2022,0.5735
3,05-May-2022,0.5750
4,04-May-2022,0.5752
...,...,...
5212,16-May-2002,0.3765
5213,15-May-2002,0.3756
5214,14-May-2002,0.3777
5215,13-May-2002,0.3754


In [16]:
# Check for null values
aud_gbp.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
5212,False,False
5213,False,False
5214,False,False
5215,False,False


In [17]:
# Determine number of nulls
aud_gbp.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [18]:
aud_gbp.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
5212    False
5213    False
5214    False
5215    False
5216    False
Length: 5217, dtype: bool

## AUD/JPY Data Cleaning

In [19]:
csv_path = Path("Fin Data/AUDJPY.csv")
jpy_aud = pd.read_csv(csv_path, header=0)
jpy_aud.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,10-May-2022,90.97,91.1,91.02,89.75,90.56,986.0,0.45
1,09-May-2022,90.52,90.57,92.3,90.46,92.2,1715.0,-1.77
2,06-May-2022,92.29,92.42,92.97,92.1,92.54,1832.0,-0.3
3,05-May-2022,92.59,92.62,93.98,92.3,93.63,678.0,-1.07
4,04-May-2022,93.66,93.7,93.66,92.23,92.32,653.0,1.34


In [20]:
jpy_aud.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
jpy_aud

Unnamed: 0,Exchange Date,Bid
0,10-May-2022,90.97
1,09-May-2022,90.52
2,06-May-2022,92.29
3,05-May-2022,92.59
4,04-May-2022,93.66
...,...,...
5212,16-May-2002,70.23
5213,15-May-2002,70.19
5214,14-May-2002,70.34
5215,13-May-2002,69.83


In [21]:
# Check for null values
jpy_aud.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
5212,False,False
5213,False,False
5214,False,False
5215,False,False


In [22]:
# Determine number of nulls
jpy_aud.isnull().sum()

Exchange Date    0
Bid              0
dtype: int64

In [23]:
jpy_aud.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
5212    False
5213    False
5214    False
5215    False
5216    False
Length: 5217, dtype: bool

## BTC/AUD Data Cleaning

In [24]:
csv_path = Path("Fin Data/BTCAUD.csv")
btc_aud = pd.read_csv(csv_path, header=0)
btc_aud.head()

Unnamed: 0,Exchange Date,Bid,Ask,High,Low,Open,Refresh Rate,BidNet
0,10-May-2022,45648.2457,45696.8916,45663.4345,42875.685,44475.2192,111893.0,1176.16
1,09-May-2022,44472.0857,44530.8843,49002.5992,43614.7871,48732.5611,271751.0,-6473.0054
2,06-May-2022,50945.0911,50978.5785,52047.5587,49824.735,51231.8009,231781.0,-286.7098
3,05-May-2022,51231.8009,51272.0996,55265.1649,50161.2858,54780.5892,225114.0,-3548.7883
4,04-May-2022,54780.5892,54822.4181,55327.2475,52951.9707,53216.6855,207177.0,1563.9037


In [25]:
btc_aud.drop(columns=['Ask','High','Low','Open','Refresh Rate','BidNet'], axis=1, inplace=True) 
btc_aud

Unnamed: 0,Exchange Date,Bid
0,10-May-2022,45648.2457
1,09-May-2022,44472.0857
2,06-May-2022,50945.0911
3,05-May-2022,51231.8009
4,04-May-2022,54780.5892
...,...,...
1932,12-Dec-2014,432.5818
1933,11-Dec-2014,422.5405
1934,10-Dec-2014,420.8464
1935,09-Dec-2014,423.6982


In [26]:
# Check for null values
btc_aud.isnull()

Unnamed: 0,Exchange Date,Bid
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
1932,False,False
1933,False,False
1934,False,False
1935,False,False


In [27]:
# Determine number of nulls
btc_aud.isnull().sum()

Exchange Date    0
Bid              1
dtype: int64

In [28]:
btc_aud.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
1932    False
1933    False
1934    False
1935    False
1936    False
Length: 1937, dtype: bool

## Concat the data together

In [29]:
# Concat the data together
currency_data = pd.concat([usd_aud, aud_eur, aud_gbp, jpy_aud], axis="columns", join="inner")
currency_data.set_index(usd_aud['Exchange Date'], inplace=True)
currency_data.drop(columns=['Exchange Date','Exchange Date','Exchange Date'], axis=1, inplace=True)
currency_data

Unnamed: 0_level_0,Bid,Bid,Bid,Bid
Exchange Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10-May-2022,0.6976,0.6592,0.5639,90.97
09-May-2022,0.6955,0.6587,0.5638,90.52
06-May-2022,0.7077,0.6705,0.5735,92.29
05-May-2022,0.7111,0.6744,0.5750,92.59
04-May-2022,0.7262,0.6835,0.5752,93.66
...,...,...,...,...
17-May-2002,0.5525,0.6014,0.3765,70.23
16-May-2002,0.5484,0.6012,0.3756,70.19
15-May-2002,0.5488,0.6068,0.3777,70.34
14-May-2002,0.5477,0.6001,0.3754,69.83


In [30]:
# Ammend column names to equal corresponding currency
columns = ['USD','EUR','GBP','JPY']
currency_data.columns = columns
currency_data.head()

Unnamed: 0_level_0,USD,EUR,GBP,JPY
Exchange Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10-May-2022,0.6976,0.6592,0.5639,90.97
09-May-2022,0.6955,0.6587,0.5638,90.52
06-May-2022,0.7077,0.6705,0.5735,92.29
05-May-2022,0.7111,0.6744,0.575,92.59
04-May-2022,0.7262,0.6835,0.5752,93.66


In [31]:
# Save the dataframe as a csv file
currency_data.to_csv('currency_data.csv')
btc_aud.to_csv('BTC_data.csv')