In [1]:
import yfinance as yf
from datetime import datetime
import matplotlib.pyplot as plt

import json
import pandas as pd    

# Load the JSON file
path1 = 'datasets/CNN 2020-07 to 2025-02-14.json'
df = pd.read_json(path1)

# Extract the "data" list from "fear_and_greed_historical"
data_list = df["fear_and_greed_historical"]["data"]

junkBonds = df['junk_bond_demand']['data']
junkDF = pd.DataFrame(junkBonds)
junkDF['BND_FGI'] = junkDF['rating']

# Convert it into a DataFrame
data_df = pd.DataFrame(data_list)

data_df = pd.merge(data_df, junkDF[['x','BND_FGI']], on=['x','x'])

data_df['Date'] = pd.to_datetime(data_df['x'], unit='ms')
data_df['Date'] = data_df['Date'].dt.strftime('%Y-%m-%d')
data_df['SPY_FGI'] = data_df['y']

df = data_df[['Date', 'SPY_FGI', 'rating', 'BND_FGI']]

# Convert rating to numerical values
rating_map = {
    'extreme fear': 0,
    'fear': 1,
    'neutral': 2,
    'greed': 3,
    'extreme greed': 4
}
df['SPY_FGI'] = df['rating'].map(rating_map)
df['BND_FGI'] = df['BND_FGI'].map(rating_map)

df = df[['Date', 'SPY_FGI', 'BND_FGI']]
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['SPY_FGI'] = df['rating'].map(rating_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['BND_FGI'] = df['BND_FGI'].map(rating_map)


Unnamed: 0,Date,SPY_FGI,BND_FGI
0,2020-07-14,2,0
1,2020-07-15,2,0
2,2020-07-16,2,0
3,2020-07-17,2,0
4,2020-07-20,2,0
...,...,...,...
1152,2025-02-07,1,0
1153,2025-02-10,2,0
1154,2025-02-11,1,0
1155,2025-02-12,1,0


In [2]:
cryptoFGI = pd.read_csv('datasets/CryptoFEI.csv')

cryptoFGI['Crypto_FGI'] = cryptoFGI['Fng_classification'].str.lower().map(rating_map)
cryptoFGI['Date'] = pd.to_datetime(cryptoFGI['Date'], format='%d-%m-%Y').dt.strftime('%Y-%m-%d')
cryptoFGI

Unnamed: 0,Date,Fng_value,Fng_classification,Crypto_FGI
0,2025-02-17,51,Neutral,2
1,2025-02-16,54,Neutral,2
2,2025-02-15,50,Neutral,2
3,2025-02-14,48,Neutral,2
4,2025-02-13,50,Neutral,2
...,...,...,...,...
2565,2018-02-05,11,Extreme Fear,0
2566,2018-02-04,24,Extreme Fear,0
2567,2018-02-03,40,Fear,1
2568,2018-02-02,15,Extreme Fear,0


In [3]:
merged = pd.merge(df, cryptoFGI, on=['Date','Date'])
merged

Unnamed: 0,Date,SPY_FGI,BND_FGI,Fng_value,Fng_classification,Crypto_FGI
0,2020-07-14,2,0,43,Fear,1
1,2020-07-15,2,0,44,Fear,1
2,2020-07-16,2,0,43,Fear,1
3,2020-07-17,2,0,41,Fear,1
4,2020-07-20,2,0,44,Fear,1
...,...,...,...,...,...,...
1152,2025-02-07,1,0,44,Fear,1
1153,2025-02-10,2,0,43,Fear,1
1154,2025-02-11,1,0,47,Neutral,2
1155,2025-02-12,1,0,46,Fear,1


In [4]:
def gold_silver_sentiment(x):
    if x < 75:
        return 0
    elif 75 <= x < 80:
        return 1
    elif 80 <= x < 85:
        return 2
    elif 85 <= x < 90:
        return 3
    else:
        return 4

gldSilverFGI = pd.read_csv('datasets/Gold-Silver-Ratio-since-1693.csv', delimiter=';')
gldSilverFGI['Date'] = pd.to_datetime(gldSilverFGI['DateTime'])
gldSilverFGI['Date'] = gldSilverFGI['Date'].dt.strftime('%Y-%m-%d')
gldSilverFGI = gldSilverFGI[gldSilverFGI['Date'] > '2020-07-14']
# Fix Gold/Silver Ratio format (replace commas with dots)
gldSilverFGI['GLD_FGI'] = gldSilverFGI['Gold/Silver'].astype(str).str.replace(',', '.').astype(float).apply(gold_silver_sentiment)
gldSilverFGI[['Date', 'Gold/Silver']]
gldSilverFGI = gldSilverFGI.drop(columns=['DateTime'])

merged = pd.merge(merged, gldSilverFGI, on=['Date', 'Date'])
merged = merged[['Date', 'SPY_FGI', 'BND_FGI', 'Crypto_FGI', 'GLD_FGI']]
# merged[['Date', 'SPY_FGI']]
merged


Unnamed: 0,Date,SPY_FGI,BND_FGI,Crypto_FGI,GLD_FGI
0,2020-07-15,2,0,1,4
1,2020-07-16,2,0,1,4
2,2020-07-17,2,0,1,4
3,2020-07-20,2,0,1,4
4,2020-07-21,2,0,1,3
...,...,...,...,...,...
1150,2025-02-06,1,0,2,3
1151,2025-02-07,1,0,1,3
1152,2025-02-10,2,0,1,3
1153,2025-02-11,1,0,2,4


## SPY historical data

In [5]:
# S&P 500 ETF TRUST (SPY)
spy = yf.Ticker("SPY")
df_spy = pd.DataFrame(spy.history(start="2020-07-14"))
print("SPY historical data")
df_spy['Change'] = df_spy['Close'] - df_spy['Open']
df_spy['Change %'] = df_spy['Change'] / df_spy['Open'] * 100
df_spy.reset_index(inplace=True)
df_spy['Date'] = df_spy['Date'].dt.strftime('%Y-%m-%d')  # Ensure same format
df_spy[['Open', 'Close', 'Change', 'Change %']]
df_spy_ml = pd.merge(
    merged[["Date", "SPY_FGI"]],
    df_spy[["Date", "Close", "Change %"]],
    on="Date",
    how="inner"
)
df_spy_ml = df_spy_ml.rename(columns={"SPY_FGI": "FGI"})
df_spy_ml.to_csv('df_spy_ml.csv', index=False)
df_spy_ml

SPY historical data


Unnamed: 0,Date,FGI,Close,Change %
0,2020-07-15,2,300.566376,-0.173691
1,2020-07-16,2,299.576385,0.312705
2,2020-07-17,2,300.444916,-0.049709
3,2020-07-20,2,302.873016,0.899112
4,2020-07-21,2,303.517334,-0.441110
...,...,...,...,...
1147,2025-02-06,1,604.501526,0.054459
1148,2025-02-07,1,598.968201,-1.008419
1149,2025-02-10,2,603.035889,0.135746
1150,2025-02-11,1,603.494568,0.458055


## Bonds historical data

In [6]:
# Vanguard Total Bond Market Index Fund (BND)
bnd = yf.Ticker("BND")
df_bnd = pd.DataFrame(bnd.history(start="2020-07-14"))
print("BND historical data")
df_bnd['Change'] = df_bnd['Close'] - df_bnd['Open']
df_bnd['Change %'] = df_bnd['Change'] / df_bnd['Open'] * 100
df_bnd.reset_index(inplace=True)
df_bnd['Date'] = df_bnd['Date'].dt.strftime('%Y-%m-%d')  # Ensure same format
df_bnd[['Open', 'Close', 'Change', 'Change %']]
df_bnd_ml = pd.merge(
    merged[["Date", "BND_FGI"]],
    df_bnd[["Date", "Close", "Change %"]],
    on="Date",
    how="inner"
)
df_bnd_ml = df_bnd_ml.rename(columns={"BND_FGI": "FGI"})
df_bnd_ml.to_csv('df_bnd_ml.csv', index=False)
df_bnd_ml

BND historical data


Unnamed: 0,Date,FGI,Close,Change %
0,2020-07-15,0,77.262398,-0.022548
1,2020-07-16,0,77.314697,-0.067550
2,2020-07-17,0,77.393059,-0.067490
3,2020-07-20,0,77.471481,-0.067413
4,2020-07-21,0,77.515007,-0.022474
...,...,...,...,...
1147,2025-02-06,0,71.876350,0.000000
1148,2025-02-07,0,71.658417,-0.069075
1149,2025-02-10,0,71.678238,-0.124219
1150,2025-02-11,0,71.559364,-0.096805


## Gold historical data

In [7]:
# SPDR Gold Shares (GLD)
gld = yf.Ticker("GLD")
df_gld = pd.DataFrame(gld.history(start="2020-07-14"))
print("GLD historical data")
df_gld['Change'] = df_gld['Close'] - df_gld['Open']
df_gld['Change %'] = df_gld['Change'] / df_gld['Open'] * 100
df_gld.reset_index(inplace=True)
df_gld['Date'] = df_gld['Date'].dt.strftime('%Y-%m-%d')  # Ensure same format
df_gld[['Date', 'Open', 'Close', 'Change', 'Change %']]
df_gld_ml = pd.merge(
    merged[["Date", "GLD_FGI"]],
    df_gld[["Date", "Close", "Change %"]],
    on="Date",
    how="inner"
)
df_gld_ml = df_gld_ml.rename(columns={"GLD_FGI": "FGI"})
df_gld_ml.to_csv('df_gld_ml.csv', index=False)
df_gld_ml


GLD historical data


Unnamed: 0,Date,FGI,Close,Change %
0,2020-07-15,4,170.339996,0.424479
1,2020-07-16,4,168.729996,-0.630157
2,2020-07-17,4,170.119995,0.105915
3,2020-07-20,4,170.940002,0.105416
4,2020-07-21,3,173.000000,0.272417
...,...,...,...,...
1147,2025-02-06,3,263.429993,-0.087238
1148,2025-02-07,3,263.899994,-0.497703
1149,2025-02-10,3,268.369995,0.138058
1150,2025-02-11,4,267.390015,-0.115800


## HODL historical data

In [8]:
# 21Shares Crypto Basket Index ETP (HODL.PA)
hodl = yf.Ticker("HODL.PA")
df_hodl = pd.DataFrame(hodl.history(start="2022-05-05"))
print("hodl historical data")
df_hodl['Change'] = df_hodl['Close'] - df_hodl['Open']
df_hodl['Change %'] = df_hodl['Change'] / df_hodl['Open'] * 100
df_hodl.reset_index(inplace=True)
df_hodl['Date'] = df_hodl['Date'].dt.strftime('%Y-%m-%d')  # Ensure same format
# df_hodl[['Date', 'Open', 'Close', 'Change', 'Change %']]
df_hodl_ml = pd.merge(
    merged[['Date', 'Crypto_FGI']].rename(columns={'Crypto_FGI': 'FGI'}),
    df_hodl[['Date', 'Close', 'Change %']],
    on='Date'
)
df_hodl_ml.to_csv('df_hodl_ml.csv', index=False)
df_hodl_ml


hodl historical data


Unnamed: 0,Date,FGI,Close,Change %
0,2022-05-05,1,15.0100,0.000000
1,2022-05-06,0,15.0100,0.000000
2,2022-05-09,0,9.9000,-2.270489
3,2022-05-10,0,9.8700,0.000000
4,2022-05-11,0,9.8700,0.000000
...,...,...,...,...
687,2025-02-06,2,17.3925,-2.258572
688,2025-02-07,1,17.7302,1.263929
689,2025-02-10,1,17.6313,-0.429203
690,2025-02-11,2,17.6313,0.000000


In [9]:

# HODL training
from xgb import Trainer, load_data
trainer = Trainer().fit(df_hodl_ml)
# mu_hodl, conf_hodl = trainer.predict(df_hodl.tail(8))

print(df_hodl_ml.iloc[len(df_hodl_ml)-16-7:len(df_hodl_ml)-16])

mu_hodl, conf_hodl = trainer.predict(df_hodl_ml.iloc[len(df_hodl_ml)-32-8:len(df_hodl_ml)-32])
trainer.save("models/hodl")
print(mu_hodl, conf_hodl)

[INFO] Trained regressor — Test MSE: 0.000348
           Date  FGI      Close  Change %
669  2025-01-10    2  18.340500  0.771973
670  2025-01-13    3  17.916000 -1.452682
671  2025-01-14    3  18.602699 -0.271806
672  2025-01-15    3  19.548800  2.942600
673  2025-01-16    3  19.972200  1.948396
674  2025-01-17    3  20.603600  1.394174
675  2025-01-21    4  20.292700  1.944162
DEBUG — amount_pct: -0.019776634871959686 | mu_view: -0.01958361789572431 | confidence: 0.5144688331915999
-0.01958361789572431 0.5144688331915999


In [10]:
# BOND training
from xgb import Trainer, load_data
trainer = Trainer().fit(df_bnd_ml)
# mu_bnd, conf_bnd = trainer.predict(df_bnd_ml.tail(8))

df_bnd_ml.iloc[len(df_bnd_ml)-16-7:len(df_bnd_ml)-16]

mu_bnd, conf_bnd = trainer.predict(df_bnd_ml.iloc[len(df_bnd_ml)-32-8:len(df_bnd_ml)-32])
trainer.save("models/bnd")
print(mu_bnd, conf_bnd)

[INFO] Trained regressor — Test MSE: 0.000008
DEBUG — amount_pct: 0.003313235007226467 | mu_view: 0.003307758337781072 | confidence: 0.5434933701165299
0.003307758337781072 0.5434933701165299


In [11]:
# SPY training
from xgb import Trainer

trainer = Trainer().fit(df_spy_ml)
# mu_spy, conf_spy = trainer.predict(df_spy_ml.tail(8))
mu_spy, conf_spy = trainer.predict(df_spy_ml.iloc[len(df_spy_ml)-32-8:len(df_spy_ml)-32])
trainer.save("models/spy")
print(mu_spy, conf_spy)


[INFO] Trained regressor — Test MSE: 0.000086
DEBUG — amount_pct: 0.003740414511412382 | mu_view: 0.003733436555938167 | confidence: 0.28698994622136526
0.003733436555938167 0.28698994622136526


In [12]:
# GOLD training
from xgb import Trainer

trainer = Trainer().fit(df_gld_ml)
# mu_gld, conf_gld = trainer.predict(df_gld_ml.tail(8))
mu_gld, conf_gld = trainer.predict(df_gld_ml.iloc[len(df_gld_ml)-32-8:len(df_gld_ml)-32])
trainer.save("models/gld")
print(mu_gld, conf_gld)


[INFO] Trained regressor — Test MSE: 0.000039
DEBUG — amount_pct: 0.0020479809027165174 | mu_view: 0.0020458866486683137 | confidence: 0.24614541164660672
0.0020458866486683137 0.24614541164660672


In [13]:
# 2. Samla views och confidences
import numpy as np


viewdict = {
    "SPY": mu_spy,
    "BND": mu_bnd,
    "GLD": mu_gld,
    "HODL.PA": mu_hodl,
}
confidences = [conf_spy, conf_bnd, conf_gld, conf_hodl]

print("Views:", viewdict)
print("Conf:", confidences)

Views: {'SPY': np.float64(0.003733436555938167), 'BND': np.float64(0.003307758337781072), 'GLD': np.float64(0.0020458866486683137), 'HODL.PA': np.float64(-0.01958361789572431)}
Conf: [np.float64(0.28698994622136526), np.float64(0.5434933701165299), np.float64(0.24614541164660672), np.float64(0.5144688331915999)]
