In [77]:
import pandas as pd
import requests

In [78]:
df = pd.read_csv("https://raw.githubusercontent.com/HerbDavis20/HerbDavis20.github.io/refs/heads/main/project_files/gold_prices.csv")

df.head(10)

Unnamed: 0,USD/Gold per ounce,price,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,22/03/1968,40.0,,,
1,25/03/1968,40.25,,,
2,26/03/1968,41.1,,,
3,27/03/1968,40.35,,,
4,28/03/1968,39.6,,,
5,29/03/1968,39.45,,,
6,01/04/1968,38.6,,,
7,03/04/1968,38.45,,,
8,04/04/1968,38.05,,,
9,05/04/1968,38.65,,,


I am going to rename the columns to make it more clear of what they are.

In [79]:
df = df.rename(columns={
    'USD/Gold per ounce': 'date',
    'price': 'Price(USD$ / Ounce)'
})
df.head(10)

Unnamed: 0,date,Price(USD$ / Ounce),Unnamed: 2,Unnamed: 3,Unnamed: 4
0,22/03/1968,40.0,,,
1,25/03/1968,40.25,,,
2,26/03/1968,41.1,,,
3,27/03/1968,40.35,,,
4,28/03/1968,39.6,,,
5,29/03/1968,39.45,,,
6,01/04/1968,38.6,,,
7,03/04/1968,38.45,,,
8,04/04/1968,38.05,,,
9,05/04/1968,38.65,,,


next I want to remove the uncecessary columnns that are not relevant to me

In [80]:
df = df[['date', 'Price(USD$ / Ounce)']]
df.head(10)

Unnamed: 0,date,Price(USD$ / Ounce)
0,22/03/1968,40.0
1,25/03/1968,40.25
2,26/03/1968,41.1
3,27/03/1968,40.35
4,28/03/1968,39.6
5,29/03/1968,39.45
6,01/04/1968,38.6
7,03/04/1968,38.45
8,04/04/1968,38.05
9,05/04/1968,38.65


I also want to get the date formatted in a more standardised way so that it is easier to merge the differnt datasets down the line. I want to get the date in to YYYY-MM-DD format

In [81]:
df['date'] = pd.to_datetime(df['date'], format='%d/%m/%Y').dt.strftime('%Y-%m-%d')

df.head(10)

Unnamed: 0,date,Price(USD$ / Ounce)
0,1968-03-22,40.0
1,1968-03-25,40.25
2,1968-03-26,41.1
3,1968-03-27,40.35
4,1968-03-28,39.6
5,1968-03-29,39.45
6,1968-04-01,38.6
7,1968-04-03,38.45
8,1968-04-04,38.05
9,1968-04-05,38.65


now I want to bring in data on the VIX volatility indicator so that I can begin analysing the correlation. 

In [82]:
#this is the url for the VIX API data
#"https://eco-cors-proxy.netlify.app/proxy?url=https%3A%2F%2Fapi.stlouisfed.org%2Ffred%2Fseries%2Fobservations%3Fseries_id%3DVIXCLS%26api_key%3D22ee7a76e736e32f54f5df0a7171538d%26file_type%3Djson"

url = "https://eco-cors-proxy.netlify.app/proxy?url=https%3A%2F%2Fapi.stlouisfed.org%2Ffred%2Fseries%2Fobservations%3Fseries_id%3DVIXCLS%26api_key%3D22ee7a76e736e32f54f5df0a7171538d%26file_type%3Djson"

#this get the data out of the API url 
response = requests.get(url)
data = response.json()

#In the api, the data I want is under the heading of "observations" so i want to extract that and convert it to a dataframe 
df_observations = pd.DataFrame(data['observations'])

# Keep only the 'date' and 'value' columns
df1 = df_observations[['date', 'value']]
df1.head(10)

Unnamed: 0,date,value
0,1990-01-02,17.24
1,1990-01-03,18.19
2,1990-01-04,19.22
3,1990-01-05,20.11
4,1990-01-08,20.26
5,1990-01-09,22.2
6,1990-01-10,22.44
7,1990-01-11,20.05
8,1990-01-12,24.64
9,1990-01-15,26.34


I also want to alter the column names just to make it clearer which dataset this is

In [83]:
df1 = df1.rename(columns={
    'value': 'VIX_value'
})
df1.head(10)

Unnamed: 0,date,VIX_value
0,1990-01-02,17.24
1,1990-01-03,18.19
2,1990-01-04,19.22
3,1990-01-05,20.11
4,1990-01-08,20.26
5,1990-01-09,22.2
6,1990-01-10,22.44
7,1990-01-11,20.05
8,1990-01-12,24.64
9,1990-01-15,26.34


The data for the gold prices goes back in the past further than that of the VIX and as I am going to be conducting a correlation analysis on them they need to have completely aligning data points. this means I need to filter the Gold data to remove vales that are not in the VIX dataset. 

The earliest obervation in the VIX data is 1990-01-02 so need to filter out all values less than this from the gold data. 

In [84]:
filtered_df = df[df['date'] >= '1990-01-02']
filtered_df.reset_index(drop=True, inplace=True) #this is to reset the index back to zero to help with merging later 
filtered_df.head(10)

Unnamed: 0,date,Price(USD$ / Ounce)
0,1990-01-02,399.2
1,1990-01-03,394.5
2,1990-01-04,398.8
3,1990-01-05,406.1
4,1990-01-08,401.85
5,1990-01-09,405.5
6,1990-01-10,411.2
7,1990-01-11,413.2
8,1990-01-12,415.0
9,1990-01-15,415.2


Now I need to merge the two dataframes by their date, which now is consistent between them both. 

In [None]:
#thid sets the date as the common index for the merge 
filtered_df.set_index('date', inplace=True)
df1.set_index('date', inplace=True)

merged_df = pd.merge(filtered_df, df1, left_index=True, right_index=True, how='inner')

After merging these dataframes and trying to perform the calculation for the 30 day rolling correlation I was running into an error which indicated some of the data was not in the correct format. instead of being an int or float the VIX_value was an object. To determin the data type of each set of data I ran the following line of code which told me the data type. 

In [88]:
print(merged_df.dtypes)

Price(USD$ / Ounce)    float64
VIX_value               object
dtype: object


I could then run the following code to convert the data type into a number 

In [89]:
merged_df['VIX_value'] = pd.to_numeric(merged_df['VIX_value'], errors='coerce')

Now that the data is all in the corect format I can perform the calcualation. This calculates the PMCC value (r value), hwich is a method I have used to analyise correlation in the past. rolling(window=30) makes this a 30 day rolling correlation, 

In [None]:
# Calculate the 30-day rolling correlation between GoldPrice and VIXIndex
rolling_corr = merged_df['Price(USD$ / Ounce)'].rolling(window=30).corr(merged_df['VIX_value'])


#displaying the first 31 values shows that it has calculated correctly, the first 29 are NaN as its a 30 day rolling so there are no valid contributions unitl the 30th day 
rolling_corr.head(30)

date
1990-01-02         NaN
1990-01-03         NaN
1990-01-04         NaN
1990-01-05         NaN
1990-01-08         NaN
1990-01-09         NaN
1990-01-10         NaN
1990-01-11         NaN
1990-01-12         NaN
1990-01-15         NaN
1990-01-16         NaN
1990-01-17         NaN
1990-01-18         NaN
1990-01-19         NaN
1990-01-22         NaN
1990-01-23         NaN
1990-01-24         NaN
1990-01-25         NaN
1990-01-26         NaN
1990-01-29         NaN
1990-01-30         NaN
1990-01-31         NaN
1990-02-01         NaN
1990-02-02         NaN
1990-02-05         NaN
1990-02-06         NaN
1990-02-07         NaN
1990-02-08         NaN
1990-02-09         NaN
1990-02-12    0.787269
dtype: float64

Now that I have the data for one line of my chart, the 30 day rolling correlation between gold and VIX, I need to do the same for Bitcoin and VIX.