# EDA and Hypotheses Testing on Portfolio Optimization


1. S&P500 vs Portfolio(Non-Optimized)
2. S&P500 vs Portfolio(Optimized)
3. Portfolio(Non-Optimized) vs Portfolio(Optimized)
4. Benchmarking the Portfolio(Non-Optimized) with S&P500

In [4]:
import pandas as pd
import yfinance as yf
from services.BlackLittermanOptimization import BlackLittermanOptimization
from utils.StockDataManager import StockDataManager
import warnings
import plotly.graph_objects as go

warnings.filterwarnings('ignore')

In [18]:
import random

def generate_random_tickers(): 
    stockDataManager = StockDataManager()
    return random.sample(stockDataManager.get_symbols(), random.randint(5, 10))

tickers = generate_random_tickers()
print(tickers)  
period = '5y'
capital = 100000
interval = '1d'

In [19]:
stdm = StockDataManager()


## S&P500 vs Portfolio(Non-Optimized)

In [20]:
def get_cumulative_returns(tickers, period, interval='1d'):
    stockDataManager = StockDataManager()

    # Fetch historical data for the portfolio
    portfolio_data_list = [stockDataManager.history(ticker=symbol, period=period, interval=interval)[['adj_close']] for
                           symbol in tickers]
    portfolio_data = pd.concat(portfolio_data_list, axis=1, keys=[symbol for symbol in tickers])
    portfolio_data.columns = portfolio_data.columns.droplevel(1)
    portfolio_data = portfolio_data.ffill().bfill()
    portfolio_data = portfolio_data.dropna()
    mean_portfolio_data = portfolio_data.mean(axis=1)

    # Fetch historical data for S&P 500, matching the dates with portfolio
    sp500_data = yf.download('^GSPC', period=period, interval=interval)['Adj Close']
    sp500_data = sp500_data.reindex(mean_portfolio_data.index).ffill().bfill()

    # Calculate cumulative returns
    portfolio_cumulative_returns = (mean_portfolio_data / mean_portfolio_data.iloc[0] - 1)
    sp500_cumulative_returns = (sp500_data / sp500_data.iloc[0] - 1) 

    return portfolio_cumulative_returns, sp500_cumulative_returns

In [21]:
def get_equal_weight_cumulative_returns(tickers, period, interval='1d'):
    stockDataManager = StockDataManager()

    # Fetch historical data for the portfolio
    portfolio_data_list = [stockDataManager.history(ticker=symbol, period=period, interval=interval)[['adj_close']] for symbol in tickers]
    portfolio_data = pd.concat(portfolio_data_list, axis=1, keys=[symbol for symbol in tickers])
    portfolio_data.columns = portfolio_data.columns.droplevel(1)
    portfolio_data = portfolio_data.ffill().bfill()
    portfolio_data = portfolio_data.dropna()

    # Calculate daily returns
    daily_returns = portfolio_data.pct_change().dropna()

    # Fetch historical data for S&P 500, matching the dates with portfolio
    sp500_data = yf.download('^GSPC', period=period, interval=interval)['Adj Close']
    sp500_data = sp500_data.reindex(daily_returns.index).ffill().bfill()

    # Get optimized weights from Black-Litterman model
    weights = [1 / len(tickers) for i in range(len(tickers))]
    print(weights)

    # Calculate daily portfolio returns using optimized weights
    portfolio_daily_returns = daily_returns.dot(weights)

    # Calculate cumulative returns
    portfolio_cumulative_returns = (1 + portfolio_daily_returns).cumprod() - 1
    sp500_cumulative_returns = (1 + sp500_data.pct_change().dropna()).cumprod() - 1

    portfolio_cumulative_returns.index = portfolio_cumulative_returns.index.astype(str)
    sp500_cumulative_returns.index = sp500_cumulative_returns.index.astype(str)

    return portfolio_cumulative_returns, sp500_cumulative_returns

In [22]:
def plot_cumulative_returns(legend_1,portfolio_cumulative_returns,legend_2, sp500_cumulative_returns):
    title = f'{legend_1} Vs. {legend_2}'
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=portfolio_cumulative_returns.index, y=portfolio_cumulative_returns, mode='lines',name=legend_1))
    fig.add_trace(go.Scatter(x=sp500_cumulative_returns.index, y=sp500_cumulative_returns, mode='lines', name=legend_2))
    fig.update_layout(title=title, xaxis_title='Date', yaxis_title='Cumulative Returns (%)', height=600,  # Set the height of the plot
    width=800 )
    fig.show()

In [23]:
non_optimized_portfolio_cumulative_returns, sp500_cumulative_returns = get_cumulative_returns(tickers, period)
plot_cumulative_returns("Non-optimized", non_optimized_portfolio_cumulative_returns, "S&P 500",sp500_cumulative_returns)

## S&P500 vs Portfolio(Optimized)

In [24]:
def get_optimized_cumulative_returns(tickers, period, capital, interval='1d'):
    stockDataManager = StockDataManager()

    # Fetch historical data for the portfolio
    portfolio_data_list = [stockDataManager.history(ticker=symbol, period=period, interval=interval)[['adj_close']] for
                           symbol in tickers]
    portfolio_data = pd.concat(portfolio_data_list, axis=1, keys=[symbol for symbol in tickers])
    portfolio_data.columns = portfolio_data.columns.droplevel(1)
    portfolio_data = portfolio_data.ffill()
    portfolio_data = portfolio_data.bfill()
    portfolio_data = portfolio_data.dropna()

    # Calculate daily returns
    daily_returns = portfolio_data.pct_change().dropna()

    # Fetch historical data for S&P 500, matching the dates with portfolio
    sp500_data = yf.download('^GSPC', period=period, interval=interval)['Adj Close']
    sp500_data = sp500_data.reindex(daily_returns.index).ffill()
    sp500_data = sp500_data.bfill()

    # Get optimized weights from Black-Litterman model
    bl = BlackLittermanOptimization()
    output = bl.calculate_portfolio_value(capital, tickers)
    weights = [obj['weight'] for obj in output]
    print(weights)

    # Calculate daily portfolio returns using optimized weights
    portfolio_daily_returns = daily_returns.dot(weights)

    # Calculate cumulative returns
    portfolio_cumulative_returns = (1 + portfolio_daily_returns).cumprod() - 1
    sp500_cumulative_returns = (1 + sp500_data.pct_change().dropna()).cumprod() - 1

    return portfolio_cumulative_returns, sp500_cumulative_returns


In [25]:
optimized_portfolio_cumulative_returns, sp500_cumulative_returns = get_optimized_cumulative_returns(tickers, period, capital, interval)
plot_cumulative_returns("Optimized", optimized_portfolio_cumulative_returns, "S&P 500",sp500_cumulative_returns)

## Portfolio(Non-Optimized) vs Portfolio(Optimized)

In [26]:
plot_cumulative_returns("Non-Optimized", non_optimized_portfolio_cumulative_returns, "Optimized",optimized_portfolio_cumulative_returns)

In [27]:
from datetime import datetime


def calculate_avg_yearly_return(cumulative_returns):
    date_format = "%Y-%m-%d %H:%M:%S"
    start_date = datetime.strptime(cumulative_returns.index.min(), date_format)
    end_date = datetime.strptime(cumulative_returns.index.max(), date_format)
    no_years = (end_date - start_date).days / 356
    opt_cumm_prot_return = cumulative_returns[-1]
    annualized_port_return = (1 + opt_cumm_prot_return) ** (1 / no_years) - 1
    annualized_port_return = annualized_port_return * 100
    return annualized_port_return

In [28]:
print("Non-optimized weight avg return: ", calculate_avg_yearly_return(non_optimized_portfolio_cumulative_returns))
print("Optimized weight avg return: ", calculate_avg_yearly_return(optimized_portfolio_cumulative_returns))
print("SP500 avg return: ", calculate_avg_yearly_return(sp500_cumulative_returns))

In [29]:
# Check correlation for all returns
get_optimized_cumulative_returns(tickers, period, capital)

## Test for N number of times

In [30]:
def test_hypothesis(n_trials):
    data = {}
    for i in range(n_trials):
        tickers = generate_random_tickers()
        print(tickers)
        non_optimized_po_returns, _ = get_cumulative_returns(tickers, period)
        optimized_po_returns, sp500_returns = get_optimized_cumulative_returns(tickers, period, capital)
        sp500_avg_return = calculate_avg_yearly_return(sp500_returns)
        non_optimized_po_avg_return = calculate_avg_yearly_return(non_optimized_po_returns)
        opt_po_avg_return = calculate_avg_yearly_return(optimized_po_returns)
        data[i] = {
            'non_opt_po_avg_returns': non_optimized_po_avg_return,
            'opt_po_avg_return': opt_po_avg_return,
            'sp500_avg_return': sp500_avg_return,
            'tickers': ', '.join(tickers)
        }
    return data
      
data = test_hypothesis(10)  
df_test = pd.DataFrame.from_dict(data, orient='index')

In [31]:
df_test

## Hypotheses Testing:

Comparing the performance of the Black-Litterman model with the performance of market benchmarks such as the S&P 500 and the NASDAQ 100.

**Hypothesies:**

H0 (Null Hypotheses): There is no significant difference between the return of S&P500 and The Optimized Portfolio.

H1 (Alternative Hypotheses): There is significant difference between the returns of S&P500 and The Optimized Portfolio.


In [32]:
# applying statistical tests like t-test
from scipy.stats import ttest_ind

t_test, p_value = ttest_ind(optimized_portfolio_cumulative_returns,
                            sp500_cumulative_returns)  # perform t-test operations.
print(f"T-Statistic: ")
print(t_test)
print(f"P-Value: ")
print(p_value)

So, the results suggests that we have to reject the null hypotheses as the p-value is significantly lower then 0.05 which is typically the threshold for rejecting the null hypotheses. This means that the optimized portfolio returns are significantly different from the S&P 500 returns.

# --------------------------- END OF THE NOTEBOOK --------------------------- #