In [None]:
# DREAMTEAM PROJECT 1 - DATA CLEAN NOTEBOOK

In [None]:
# # Datasets to be used
# 1. Trading cards (high liquidity cards) - 
# 2. Bitcoin - https://api.alternative.me/v2/ticker/Bitcoin/?convert=CAD
# 3. Government bond (baseline) - https://au.investing.com/rates-bonds/australia-10-year-bond-yield-historical-data
# 4. ASX200 (general stock market) - https://www.google.com/finance/quote/XJO:INDEXASX
# 5. Property - ABS data in 8 capital cities- https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/residential-property-price-indexes-eight-capital-cities/latest-release

In [10]:
# Intialising imports
import os
import requests
import pandas as pd
import json
import datetime as dt
from pathlib import Path

%matplotlib inline

In [11]:
# Importing Bitcoin current price
# Bitcoin API URL
btc_url = "https://api.alternative.me/v2/ticker/Bitcoin/?convert=AUD"

# Fetch current BTC price
btc_response = requests.get(btc_url)
btc_content = btc_response.content
btc_data = btc_response.json()
#print (json.dumps(btc_data, indent=4))
btc_price_current = float (btc_data["data"]["1"]["quotes"]["USD"]["price"])
print (btc_price_current)

39853.0


In [29]:
# Importing Bitcoin historical data

# Set the file path
btc_path=Path("btc_historical.csv")

# Reading the data
btc_df=pd.read_csv(btc_path, index_col='Date', parse_dates=True, infer_datetime_format=True)
btc_df.index = btc_df.index.date

# Ensuring data loaded correctly
btc_df.head()

Unnamed: 0,Close
2015-11-19,454.097675
2015-11-20,445.560711
2015-11-22,447.867676
2015-11-23,449.365998
2015-11-24,442.422744


In [26]:
# Cleaning Bitcoin historical data

# Count nulls
btc_df.isnull().mean()

# Drop nulls
btc_df = btc_df.dropna()

# Checking nulls are dropped
btc_df.isnull().sum()

Close    0
dtype: int64

In [40]:
# Importing and cleaning ASX200 historical data

# Set the file path
asx200_path=Path("asx200_historical.csv")

# Reading the data
asx200_df=pd.read_csv(asx200_path, index_col='Date', parse_dates=True, infer_datetime_format=True)
asx200_df.index = asx200_df.index.date

# Ensuring data loaded correctly
asx200_df.head()

Unnamed: 0,Close
2011-05-20,4732.2
2011-05-23,4643.0
2011-05-24,4628.8
2011-05-25,4584.7
2011-05-26,4660.2


In [46]:
asx200_df.isnull().mean()

Close    0.0
dtype: float64

In [53]:
# Importing and cleaning bonds historical data

# Set the file path
bonds_path=Path("asx_bonds.csv")

# Reading the data
bonds_df=pd.read_csv(bonds_path, index_col='Date', parse_dates=True, infer_datetime_format=True)
bonds_df.index = bonds_df.index.date

# Ensuring data loaded correctly
bonds_df.head()

Unnamed: 0,Close
2012-07-27,25.0
2012-08-08,24.57
2012-09-12,24.92
2012-09-19,24.78
2012-09-24,24.71


In [49]:
bonds_df.isnull().mean()

Close    0.0
dtype: float64

In [69]:
# Importing and cleaning property historical data

# Set the file path
property_path=Path("property_history.csv")

# Reading the data
property_df=pd.read_csv(property_path, index_col='Date', parse_dates=True, infer_datetime_format=True)
property_df.index = property_df.index.date

# Ensuring data loaded correctly
property_df.head()


Unnamed: 0,Sydney,Melbourne,Brisbane,Adelaide,Perth,Hobart,Darwin,Canberra,Weighted Average of eight Capital Cities
2003-09-01,85.3,60.7,64.2,62.2,48.3,61.2,40.5,68.3,69.0
2003-12-01,88.2,62.1,69.4,63.9,50.6,66.5,43.3,70.9,71.6
2004-03-01,87.0,60.8,70.7,64.8,52.5,68.7,45.5,69.9,71.3
2004-06-01,84.4,60.9,71.7,65.9,53.7,71.8,45.2,70.1,70.6
2004-09-01,84.2,60.9,71.2,67.1,54.7,72.3,46.8,68.6,70.6


In [70]:
property_df.dropna()
property_df.isnull().mean()

Sydney                                      0.0
Melbourne                                   0.0
Brisbane                                    0.0
Adelaide                                    0.0
Perth                                       0.0
Hobart                                      0.0
Darwin                                      0.0
Canberra                                    0.0
Weighted Average of eight Capital Cities    0.0
dtype: float64

In [71]:
# All Nulls removed from data sets

print (f"BTC is {btc_df.isnull().sum()}")
print (f"asx200 is {asx200_df.isnull().sum()}")
print (f"bonds is {bonds_df.isnull().sum()}")
print (f"property is {property_df.isnull().sum()}")

BTC is Close    0
dtype: int64
asx200 is Close    0
dtype: int64
bonds is Close    0
dtype: int64
property is Sydney                                      0
Melbourne                                   0
Brisbane                                    0
Adelaide                                    0
Perth                                       0
Hobart                                      0
Darwin                                      0
Canberra                                    0
Weighted Average of eight Capital Cities    0
dtype: int64
