# Testing out Yfinance

### Importing Libraries


In [26]:
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.express as px
import os

import plotly.io as pio
pio.templates.default = "ggplot2"

### Get latest Gold data from yfinance

In [None]:
# Since XAUUSD=X can be spotty with yfinance, GLD (SPDR Gold Shares) will be used

symbol = "GLD"
t = yf.Ticker(symbol)

# Daily time series
data = t.history(period="10y", interval="1d")
print(data.tail())

# Latest close only
last_close = data["Close"].iloc[-1]
print(f"Latest close for {symbol}: {last_close}")

                                 Open        High         Low       Close  \
Date                                                                        
2026-01-05 00:00:00-05:00  406.390015  409.720001  406.149994  408.760010   
2026-01-06 00:00:00-05:00  410.670013  413.480011  410.309998  413.179993   
2026-01-07 00:00:00-05:00  408.630005  410.809998  406.649994  409.230011   
2026-01-08 00:00:00-05:00  406.970001  411.640015  406.399994  411.489990   
2026-01-09 00:00:00-05:00  413.214996  415.279999  411.799988  414.519989   

                             Volume  Dividends  Stock Splits  Capital Gains  
Date                                                                         
2026-01-05 00:00:00-05:00  13410600        0.0           0.0            0.0  
2026-01-06 00:00:00-05:00  11640900        0.0           0.0            0.0  
2026-01-07 00:00:00-05:00  10589500        0.0           0.0            0.0  
2026-01-08 00:00:00-05:00   8683900        0.0           0.0          

### Save 10years Data

In [None]:
data = yf.download("GLD", period="10y")

# Select only the 'Close' column
gold_data = data[['Close']].copy()

# Remove any days where the market was closed (NaNs)
gold_data.dropna(inplace=True)

print(gold_data.head())


gold_data.to_csv("../data/gold_data_10y.csv")

[*********************100%***********************]  1 of 1 completed

Price            Close
Ticker             GLD
Date                  
2016-01-11  104.739998
2016-01-12  104.209999
2016-01-13  104.720001
2016-01-14  103.019997
2016-01-15  104.080002





### Inspect Data

In [16]:
gold_data = pd.read_csv("../data/gold_data_10y.csv")
print(gold_data.head())

        Price               Close
0      Ticker                 GLD
1        Date                 NaN
2  2016-01-11  104.73999786376953
3  2016-01-12  104.20999908447266
4  2016-01-13  104.72000122070312


In [17]:
# rename columns

gold_data.rename(columns={"Price": "Date", "Close": "Price"}, inplace=True)

In [19]:
# fix data issues
rows_to_drop = gold_data.index[:2]
gold_data.drop(rows_to_drop, inplace=True)

In [21]:
# fix indexing and date format
gold_data.reset_index(drop=True, inplace=True)
gold_data['Date'] = pd.to_datetime(gold_data['Date'])

In [23]:
# set the index to date

gold_data.set_index('Date', inplace=True)

In [28]:
gold_data.tail()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2026-01-05,408.760009765625
2026-01-06,413.1799926757813
2026-01-07,409.2300109863281
2026-01-08,411.489990234375
2026-01-09,414.2950134277344


In [46]:
# Filter for your specific plot range: Jan 2024 to Jan 2026
plot_gold_data = gold_data.loc['2024-01-01':'2026-01-09'].copy()
plot_gold_data['Price'] = plot_gold_data['Price'].astype(float)
plot_gold_data['Price'] = plot_gold_data['Price'] * 10.8
plot_gold_data['Price'] = plot_gold_data['Price'].round(0)

# Create the plot
fig = px.line(
    plot_gold_data, 
    y='Price', 
    title='Gold (GLD) Price: Jan 2024 - Jan 2026',
    labels={'Price': 'Price (USD)', 'Date': 'Date'}
)

# Clean up the Y-Axis
fig.update_yaxes(
    tickformat=".0f", 
    dtick=200,          
    range=[1000, 5000]    
)

# Add a grid to make it easier to read the 20-point gaps
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGrey')

fig.show()

In [47]:
# Save clean data

gold_data.to_csv("../data/cleaned_gold_data_10y.csv")