In [None]:
# First Version
# The script downloads from yahoo finance a set of indexes
# it removes the missing indexes and fills the missing values at the beginning of the hystory (if needed)
# Calcs the daily and annualized returns
# then displays a set of randomly generated portfolios in order to observe the efficient frontier and the CAL
# ^IRX has been chosen as riskless return
# N.B. at the moment no OOP (will be implemented in the following version)

In [None]:
!pip install yfinance

In [None]:
#import libraries
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
plt.style.use("seaborn-v0_8")
import seaborn
import datetime
import random
import math


In [None]:
index_tickers = [
    # --- Global Stock Indexes
    '^GSPC',        # **S&P 500 (USA Large Cap)
    '^DJI',          # **Dow Jones Industrial Average (USA Blue Chip)             
    '^IXIC',        # **NASDAQ Composite (USA)
    '^RUT',          # Russell 2000 (USA Small Caps)
    '^FTSE',        # **FTSE 100 (UK)
    '^GDAXI',       # **DAX (Germany)** - 
    '^STOXX50E',    # **Euro Stoxx 50 (Europe Blue Chip)** 
    '^N225',        # **Nikkei 225 (Japan)**
    '^HSI',         # **Hang Seng Index (Hong Kong)** 
    '000001.SS',    # SSE Composite Index (Cina)
    
    # ---Other Indexes ---
    '^FCHI',        # **CAC 40 (Francr)
    '^AORD',        # **All Ordinaries (Australia)
    # ---Emerging Markets---
    '^JKSE',        # **Jakarta Composite Index (Indonesia)
    '^BVSP',        # **Ibovespa (Brasile)
    '^MXX',         # **IPC (Messico)
    ]

bond_tickers= ["^IRX", # US 13-Week Treasury Bill Yield.
               "^TNX", # US 10-Year Treasury Note Yield.
               "^TYX" # US 30-Year Treasury Bond Yield.
              ]


all_tickers = index_tickers + bond_tickers


In [None]:
data = yf.download(all_tickers, start="1997-01-01", end="2024-12-31", auto_adjust=True)['Close'] # download data close from 1997

In [None]:
data.info() 

In [None]:
#fill intermediate holes and nan at the beginning of the period
data = data.dropna(axis=1, how='all')
data = data.ffill().bfill() 
data = data.dropna(axis=0, how='any') #remove dates if nan is still present
data.info()

In [None]:
# plot to see the behaviour of the assets considered
for c in data.columns:
  plt.figure()
  plt.title(c)
  data[c].plot(figsize=(20,10))
  plt.show()

In [None]:
#split between equity and bonds
valid_bond_yield_tickers = [t for t in bond_tickers if t in data.columns] 
valid_equity_tickers = [t for t in index_tickers if t in data.columns]
bond_yields_data = data[valid_bond_yield_tickers]
equity_prices = data[valid_equity_tickers]
equity_prices.head()
#bond_yields_data.tail()

In [None]:
stocks_returns = equity_prices.pct_change().dropna() # get returns
df_returns = stocks_returns.copy()
df_returns.head()

In [None]:

#choose risk free asset
risk_free_ticker = '^IRX' 
if risk_free_ticker in bond_yields_data.columns:
    # get average ^IRX return (already annual percentage)
    R_f = bond_yields_data[risk_free_ticker].mean() 

In [None]:
#correlation
plt.figure(figsize=(13,8))
#seaborn.set(font_scale=1.2)
seaborn.heatmap(df_returns.corr(),cmap="Reds", annot=True, annot_kws={"size":12})

In [None]:
#how many equally weighted portfolios can be created?
x=math.factorial(len(df_returns.columns))
for i in range(2,len(df_returns.columns)):
  print(i,x/math.factorial(len(df_returns.columns)-i))

In [None]:
#produce covariances and produce single stock portfolios
names = df_returns.columns.tolist()
annualized_decimal_means = (df_returns.mean() + 1)**252 - 1
annualized_decimal_covariance = df_returns.cov() * 252
rows_table1 = []
for k in range(len(df_returns.columns)):
  w = np.zeros(len(df_returns.columns))
  w[k]=1.00
  w = w/sum(w)
  rend_decimal = np.dot(annualized_decimal_means, w)
  variance_decimal = np.dot(w.T, np.dot(annualized_decimal_covariance, w))
  vol_decimal = np.sqrt(variance_decimal) 
    
#convert to percentage for display in the table
  rend_percent = rend_decimal * 100
  vol_percent = vol_decimal * 100
  R_f_decimal_for_sharpe = R_f / 100.0 if R_f > 1 else R_f # Adjust based on how R_f is defined

  sharpe = (rend_decimal - R_f_decimal_for_sharpe) / vol_decimal if vol_decimal != 0 else np.inf
    
# Prepare the row data with percentages for rend, vol, and weights
  row_data = [rend_percent, vol_percent, sharpe] + list(w * 100)
  rows_table1.append(row_data)
table1 = pd.DataFrame(rows_table1, columns=["return", "volatility", "Sharpe"] + names)
print(table1)


In [None]:

# prduce randomly generated portfolios
rows_table = []
quante=30000
for k in range(quante):
  w=np.random.normal(1,0.2,len(df_returns.columns))
  w[w>1]=w[w>1]-1
  w = w/sum(w)
    
  rend_decimal = np.dot(annualized_decimal_means, w)  
  # Calculate portfolio variance (annualized, decimal^2)
  variance_decimal = np.dot(w.T, np.dot(annualized_decimal_covariance, w))  
  # Calculate volatility (annualized, decimal)
  vol_decimal = np.sqrt(variance_decimal) 
  # convert to percentage for display in the table
  rend_percent = rend_decimal * 100
  vol_percent = vol_decimal * 100
  R_f_decimal_for_sharpe = R_f / 100.0 if R_f > 1 else R_f # Adjust based on how R_f is defined

  sharpe = (rend_decimal - R_f_decimal_for_sharpe) / vol_decimal if vol_decimal != 0 else np.inf
  row_data = [rend_percent, vol_percent, sharpe] + list(w * 100)
  rows_table.append(row_data)
  if k%10000==0:
    print(k)
table = pd.DataFrame(rows_table, columns=["return","volatility", "Sharpe"] + names)
table.head()

In [None]:
# print the scatter plot to see the efficient frontier and the CAL capital allocation line
ax = table.plot.scatter(x="volatility",y="return", figsize=(20,9),fontsize=12,s=1,color="r")
axes = plt.gca()
x=axes.get_xlim()
y=axes.get_ylim()
table1.plot.scatter(ax=ax,x="volatility",y="return", figsize=(20,9),fontsize=12,s=3,color="g",xlim=x,ylim=y)
#for i in table.index:
   # plt.annotate(i,xy=(table.loc[i,"varianza"],table.loc[i,"return"]),size=8)
for i in table1.index:
    plt.annotate(i,xy=(table1.loc[i,"volatility"],table1.loc[i,"return"]),size=10)
plt.plot([0, table.loc[table["Sharpe"].idxmax(), 'volatility'], table.loc[table["return"].idxmax(), 'volatility']], [R_f, table.loc[table["Sharpe"].idxmax(), 'return'], R_f+table["Sharpe"].max()*table.loc[table["return"].idxmax(), 'volatility']], color='grey', linestyle='-', linewidth=2, marker='o', markersize=1, label='Linea tra Punti')

In [None]:
table["return"].idxmax(),table.loc[table["return"].idxmax()]

In [None]:
table["volatility"].idxmin(),table.loc[table["volatility"].idxmin()]

In [None]:
table["Sharpe"].idxmax(),table.loc[table["Sharpe"].idxmax()]