# Objectives

- Use gold price as target variable and collect various and different factors as features to predict gold price

# Import

In [3]:
import pandas as pd
from datetime import datetime

# Read Data

## Gold Price Data
1. Download gold price data from https://www.gold.org/goldhub/data/gold-prices in `xlsx` format
2. Select monthly avergage gold price data in USD and save as **`PRICE-MONTHLY.csv`**

In [4]:
price = pd.read_csv('PRICE-MONTHLY.csv', parse_dates=['DATE'])

# create YearMonth column to easily merge data
price['YearMonth'] = price['DATE'].map(lambda x: 100*x.year + x.month)
price.head()

Unnamed: 0,DATE,PRICE,YearMonth
0,1978-12-31,207.8,197812
1,1979-01-31,227.3,197901
2,1979-02-28,245.7,197902
3,1979-03-30,242.1,197903
4,1979-04-30,239.2,197904


In [7]:
# quick check the null values
price.isnull().sum()

DATE         0
PRICE        0
YearMonth    0
dtype: int64

In [15]:
# quick check the strat and end date of data
display(price['DATE'].min())
price['DATE'].max()

Timestamp('1978-12-31 00:00:00')

Timestamp('2019-09-30 00:00:00')

## Nominal Effective Exchange Rate Data
1. Download nominal effective exchange rate data from https://www.bis.org/statistics/eer.htm in `csv` format
2. Select daily US nominal effective exchange monthly and save as **`NEER.csv`**

In [8]:
neer = pd.read_csv('NEER.csv',parse_dates=['DATE'])

# transform daily neer data to monthly data
neer_monthly = neer.resample('M', on='DATE').mean().reset_index()

# create YearMonth column to easily merge data
neer_monthly['YearMonth'] = neer_monthly['DATE'].map(lambda x: 100*x.year + x.month)
neer_monthly.head()

Unnamed: 0,DATE,NEER,YearMonth
0,1983-10-31,152.060952,198310
1,1983-11-30,153.582273,198311
2,1983-12-31,155.408636,198312
3,1984-01-31,156.589545,198401
4,1984-02-29,154.926667,198402


In [18]:
# quick check the null values
neer_monthly.isnull().sum()

DATE         0
NEER         0
YearMonth    0
dtype: int64

In [16]:
# quick check the strat and end date of data
display(neer['DATE'].min())
neer['DATE'].max()

Timestamp('1983-10-03 00:00:00')

Timestamp('2019-09-30 00:00:00')

## Effective Federal Funds Rate Data
1. Download monthly effective federal funds rate data from https://fred.stlouisfed.org/series/FEDFUNDS and save as **`FFR-MONTHLY.csv`**

In [11]:
ffr = pd.read_csv('FFR-MONTHLY.csv', parse_dates=['DATE'])
# create YearMonth column to easily merge data
ffr['YearMonth'] = ffr['DATE'].map(lambda x: 100*x.year + x.month)
ffr.head()

Unnamed: 0,DATE,FEDFUNDS,YearMonth
0,1954-07-01,0.8,195407
1,1954-08-01,1.22,195408
2,1954-09-01,1.06,195409
3,1954-10-01,0.85,195410
4,1954-11-01,0.83,195411


In [12]:
# quick check the null values
ffr.isnull().sum()

DATE         0
FEDFUNDS     0
YearMonth    0
dtype: int64

In [19]:
# quick check the strat and end date of data
display(ffr['DATE'].min())
ffr['DATE'].max()

Timestamp('1954-07-01 00:00:00')

Timestamp('2019-09-01 00:00:00')

## Consumer Price Index Data
1. Download consumer price index data from https://fred.stlouisfed.org/series/CPIAUCSL and save as **`CPI-MONTHLY.csv`**

In [21]:
cpi = pd.read_csv('CPI-MONTHLY.csv', parse_dates=['DATE'])
# create YearMonth column to easily merge data
cpi['YearMonth'] = cpi['DATE'].map(lambda x: 100*x.year + x.month)
cpi.head()

Unnamed: 0,DATE,CPIHOSNS,YearMonth
0,1967-01-01,30.5,196701
1,1967-02-01,30.5,196702
2,1967-03-01,30.5,196703
3,1967-04-01,30.6,196704
4,1967-05-01,30.7,196705


In [22]:
# quick check the null values
cpi.isnull().sum()

DATE         0
CPIHOSNS     0
YearMonth    0
dtype: int64

In [23]:
# quick check the strat and end date of data
display(cpi['DATE'].min())
cpi['DATE'].max()

Timestamp('1967-01-01 00:00:00')

Timestamp('2019-08-01 00:00:00')

## Dow Jones Industrial Average Data
1. Download Dow Jones industrial average data from https://finance.yahoo.com/quote/%5EDJI/history?p=%5EDJI and save as **`DJIA-MONTHLY.csv`**

In [33]:
djia = pd.read_csv('DJIA-MONTHLY.csv', parse_dates=['Date'])
# create YearMonth column to easily merge data
djia['YearMonth'] = dfi['Date'].map(lambda x: 100*x.year + x.month)
djia.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,YearMonth
0,1985-01-01,1277.719971,1305.099976,1266.890015,1286.77002,1286.77002,44450000,198501
1,1985-02-01,1276.939941,1307.530029,1263.910034,1284.01001,1284.01001,207300000,198502
2,1985-03-01,1285.339966,1309.959961,1242.819946,1266.780029,1266.780029,201050000,198503
3,1985-04-01,1264.800049,1290.300049,1245.800049,1258.060059,1258.060059,187110000,198504
4,1985-05-01,1257.180054,1320.790039,1235.530029,1315.410034,1315.410034,242250000,198505


In [31]:
# quick check the null values
djia.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
YearMonth    0
dtype: int64

In [32]:
# quick check the strat and end date of data
display(djia['Date'].min())
djia['Date'].max()

Timestamp('1985-01-01 00:00:00')

Timestamp('2019-10-01 00:00:00')

## World Offical Gold Reserve Data
1. Download world offical gold reserve data from https://www.gold.org/goldhub/data/monthly-central-bank-statistics in `xlsx` format
2. Select quarterly wold gold reserve in toones and save as **`WGR-QUARTERLY.csv`**

In [36]:
wgr = pd.read_csv('WGR-QUARTERLY.csv', parse_dates=['DATE'])
wgr.head()

Unnamed: 0,DATE,VOLUME
0,Q1 2000,33444
1,Q2 2000,33267
2,Q3 2000,33310
3,Q4 2000,33212
4,Q1 2001,33065


In [37]:
# quick check the null values
wgr.isnull().sum()

DATE      0
VOLUME    0
dtype: int64

In [38]:
# quick check the strat and end date of data
display(wgr['DATE'].min())
wgr['DATE'].max()

'Q1 2000'

'Q4 2018'

In [56]:
# merge 5 datasets except world offical gold reserve dataset
final_merge = price.merge(neer_monthly,how='inner',on='YearMonth').merge(ffr, how='inner', on='YearMonth').merge(cpi, how='inner', on='YearMonth').merge(djia, how='inner', on='YearMonth')

In [57]:
# drop duplicated columns
final_drop = final_merge.drop(columns=['DATE_y', 'DATE_x'])
final_drop.columns

Index(['PRICE', 'YearMonth', 'NEER', 'FEDFUNDS', 'CPIHOSNS', 'Date', 'Open',
       'High', 'Low', 'Close', 'Adj Close', 'Volume'],
      dtype='object')

In [58]:
# rename columns
final_drop.columns = ['gold_price', 'year_month','nominal_effective_exchange_rate',' effective_federal_funds_rate','cpi_index','date','djia_open','djia_high',
                     'djia_low','djia_close','djia_adj_close','djia_volume']

final = final_drop.set_index('date').reset_index()
final

Unnamed: 0,date,gold_price,year_month,nominal_effective_exchange_rate,effective_federal_funds_rate,cpi_index,djia_open,djia_high,djia_low,djia_close,djia_adj_close,djia_volume
0,1985-01-01,302.8,198501,171.491304,8.35,105.300,1277.719971,1305.099976,1266.890015,1286.770020,1286.770020,44450000
1,1985-02-01,299.1,198502,176.187000,8.50,105.800,1276.939941,1307.530029,1263.910034,1284.010010,1284.010010,207300000
2,1985-03-01,303.9,198503,177.040476,8.58,106.100,1285.339966,1309.959961,1242.819946,1266.780029,1266.780029,201050000
3,1985-04-01,325.3,198504,170.949545,8.27,106.500,1264.800049,1290.300049,1245.800049,1258.060059,1258.060059,187110000
4,1985-05-01,316.4,198505,171.393913,7.97,107.300,1257.180054,1320.790039,1235.530029,1315.410034,1315.410034,242250000
5,1985-06-01,316.5,198506,169.692500,7.53,107.900,1321.239990,1341.170044,1285.390015,1335.459961,1335.459961,205340000
6,1985-07-01,317.2,198507,164.653478,7.88,108.300,1334.010010,1372.199951,1313.400024,1347.449951,1347.449951,235130000
7,1985-08-01,329.8,198508,162.478182,7.90,108.700,1350.020020,1361.900024,1303.650024,1334.010010,1334.010010,176010000
8,1985-09-01,323.4,198509,163.872857,7.92,108.900,1333.670044,1348.229980,1283.709961,1328.630005,1328.630005,196960000
9,1985-10-01,325.8,198510,156.121304,7.99,109.100,1329.640015,1384.630005,1315.189941,1374.310059,1374.310059,253060000
