In [None]:
!pip install yfinance

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
plt.style.use("seaborn-v0_8")
import seaborn
import datetime
import random
import math


In [None]:
index_tickers = [
    # --- Indici Azionari Globali / Sviluppati (Long History) ---
    '^GSPC',        # **S&P 500 (USA Large Cap)** -
    '^DJI',          # **Dow Jones Industrial Average (USA Blue Chip)**              
    '^IXIC',        # **NASDAQ Composite (USA Tecnologia/Crescita)** -
    '^RUT',          # Russell 2000 (USA Small Caps) -
    '^FTSE',        # **FTSE 100 (Regno Unito)** -
    '^GDAXI',       # **DAX (Germania)** - 
    '^STOXX50E',    # **Euro Stoxx 50 (Europa Blue Chip)** 
    '^N225',        # **Nikkei 225 (Giappone)**
    '^HSI',         # **Hang Seng Index (Hong Kong)** 
    '000001.SS',    # SSE Composite Index (Cina Continentale)
    
    # --- Indici Azionari Aggiuntivi per Diversificazione (con buona storia) ---
    '^FCHI',        # **CAC 40 (Francia)** - Indice principale della borsa di Parigi
    '^AORD',        # **All Ordinaries (Australia)** - Indice azionario australiano
    '^JKSE',        # **Jakarta Composite Index (Indonesia)** - Per esposizione a un mercato emergente asiatico storico
    '^BVSP',        # **Ibovespa (Brasile)** - Indice principale del mercato azionario brasiliano
    '^MXX',         # **IPC (Messico)** - Indice principale del mercato azionario messicano
]

bond_tickers=["^IRX","^TNX", "^TYX"]
all_tickers = index_tickers + bond_tickers


In [None]:
data = yf.download(all_tickers, start="1997-01-01", end="2024-12-31", auto_adjust=True)['Close'] # download data close

In [None]:
data.info()

In [None]:
#fill intermediate holes and nan at the beginning of the period
data = data.dropna(axis=1, how='all')
data = data.ffill().bfill() 
data = data.dropna(axis=0, how='any') #remove dates if nan is still present
data.info()

In [None]:
# plot to see the behaviour of the assets considered
for c in data.columns:
  plt.figure()
  plt.title(c)
  data[c].plot(figsize=(20,10))
  plt.show()

In [None]:
#split between equity and bonds
valid_bond_yield_tickers = [t for t in bond_tickers if t in data.columns] 
valid_equity_tickers = [t for t in index_tickers if t in data.columns]
bond_yields_data = data[valid_bond_yield_tickers]
equity_prices = data[valid_equity_tickers]
equity_prices.head()
#bond_yields_data.tail()

In [None]:
rendimenti_azionario = equity_prices.pct_change().dropna() # calcola rendimenti 
df_ritorni = rendimenti_azionario.copy()
df_ritorni.head()

In [None]:

#choose risk free asset
risk_free_ticker = '^IRX' 
if risk_free_ticker in bond_yields_data.columns:
    # Calcola il rendimento annualizzato medio storico di ^IRX (già in percentuale annuale)
    # Nota: I dati di bond_yields_data sono già % annuali, quindi prendi la media.
    R_f = bond_yields_data[risk_free_ticker].mean() 

In [None]:
#correlation
plt.figure(figsize=(13,8))
#seaborn.set(font_scale=1.2)
seaborn.heatmap(df_ritorni.corr(),cmap="Reds", annot=True, annot_kws={"size":12})

In [None]:
#how many equally weighted portfolios can be created?
x=math.factorial(len(df_ritorni.columns))
for i in range(2,len(df_ritorni.columns)):
  print(i,x/math.factorial(len(df_ritorni.columns)-i))

In [None]:
#produce covariances and produce single stock portfolios
nomi = df_ritorni.columns.tolist()
medie_annualizzate_decimali = (df_ritorni.mean() + 1)**252 - 1
cov_annualizzata_decimali = df_ritorni.cov() * 252
rows_tabella1 = []
for k in range(len(df_ritorni.columns)):
  w = np.zeros(len(df_ritorni.columns))
  w[k]=1.00
  w = w/sum(w)
  rend_decimal = np.dot(medie_annualizzate_decimali, w)
    
  # Calculate portfolio variance (annualized, decimal^2)
  variance_decimal = np.dot(w.T, np.dot(cov_annualizzata_decimali, w))
    
  # Correct: Calculate volatility (annualized, decimal)
  vol_decimal = np.sqrt(variance_decimal) 
    
#convert to percentage for display in the table
  rend_percent = rend_decimal * 100
  vol_percent = vol_decimal * 100
  R_f_decimal_for_sharpe = R_f / 100.0 if R_f > 1 else R_f # Adjust based on how R_f is defined

  quasi_sharpe = (rend_decimal - R_f_decimal_for_sharpe) / vol_decimal if vol_decimal != 0 else np.inf
    
# Prepare the row data with percentages for rend, vol, and weights
  row_data = [rend_percent, vol_percent, quasi_sharpe] + list(w * 100)
  rows_tabella1.append(row_data)
tabella1 = pd.DataFrame(rows_tabella1, columns=["rendimento", "volatilità", "quasi Sharpe"] + nomi)
print(tabella1)


In [None]:

# prduce randomly generated portfolios
rows_tabella = []
quante=300000
for k in range(quante):
  w=np.random.normal(1,0.2,len(df_ritorni.columns))
  w[w>1]=w[w>1]-1
  w = w/sum(w)
    
  rend_decimal = np.dot(medie_annualizzate_decimali, w)  
  # Calculate portfolio variance (annualized, decimal^2)
  variance_decimal = np.dot(w.T, np.dot(cov_annualizzata_decimali, w))  
  # Calculate volatility (annualized, decimal)
  vol_decimal = np.sqrt(variance_decimal) 
  # convert to percentage for display in the table
  rend_percent = rend_decimal * 100
  vol_percent = vol_decimal * 100
  R_f_decimal_for_sharpe = R_f / 100.0 if R_f > 1 else R_f # Adjust based on how R_f is defined

  quasi_sharpe = (rend_decimal - R_f_decimal_for_sharpe) / vol_decimal if vol_decimal != 0 else np.inf
  row_data = [rend_percent, vol_percent, quasi_sharpe] + list(w * 100)
  rows_tabella.append(row_data)
  if k%10000==0:
    print(k)
tabella = pd.DataFrame(rows_tabella, columns=["rendimento","volatilità", "quasi Sharpe"] + nomi)
tabella.head()

In [None]:
# print the scatter plot to see the efficient frontier and the CAL capital allocation line
ax = tabella.plot.scatter(x="volatilità",y="rendimento", figsize=(20,9),fontsize=12,s=1,color="r")
axes = plt.gca()
x=axes.get_xlim()
y=axes.get_ylim()
tabella1.plot.scatter(ax=ax,x="volatilità",y="rendimento", figsize=(20,9),fontsize=12,s=3,color="g",xlim=x,ylim=y)
#for i in tabella.index:
   # plt.annotate(i,xy=(tabella.loc[i,"varianza"],tabella.loc[i,"rendimento"]),size=8)
for i in tabella1.index:
    plt.annotate(i,xy=(tabella1.loc[i,"volatilità"],tabella1.loc[i,"rendimento"]),size=10)
plt.plot([0, tabella.loc[tabella["quasi Sharpe"].idxmax(), 'volatilità'], tabella.loc[tabella["rendimento"].idxmax(), 'volatilità']], [R_f, tabella.loc[tabella["quasi Sharpe"].idxmax(), 'rendimento'], R_f+tabella["quasi Sharpe"].max()*tabella.loc[tabella["rendimento"].idxmax(), 'volatilità']], color='grey', linestyle='-', linewidth=2, marker='o', markersize=1, label='Linea tra Punti')

In [None]:
tabella["rendimento"].idxmax(),tabella.loc[tabella["rendimento"].idxmax()]

In [None]:
tabella["volatilità"].idxmin(),tabella.loc[tabella["volatilità"].idxmin()]

In [None]:
tabella["quasi Sharpe"].idxmax(),tabella.loc[tabella["quasi Sharpe"].idxmax()]