# Return Prediction for View and Uncertainty Matrix

In [44]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
# import xgboost as xgb
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import yfinance as yf

# Function to preprocess data
def preprocess_data(df):
    df['Date'] = pd.to_datetime(df.index)
    df.set_index('Date', inplace=True)
    df['Future_Return'] = df['Adj Close'].shift(-100) / df['Adj Close'] - 1
    df = df.dropna(subset=['Future_Return'])
    df['SMA_5'] = df['Adj Close'].rolling(window=5).mean()
    df['SMA_200'] = df['Adj Close'].rolling(window=200).mean()
    df = df.dropna()
    features = ['Open', 'High', 'Low', 'Adj Close', 'Volume', 'SMA_5']
    X = df[features]
    y = df['Future_Return']
    # scaler = StandardScaler()
    # X_scaled = scaler.fit_transform(X)
    return X, y

# Function to train models and perform bootstrapping
def train_and_predict(X_train, y_train, X_test, n_iterations=100):
    models = {
        'Linear Regression': LinearRegression(),
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
        # 'XGBoost': xgb.XGBRegressor(objective='reg:squarederror', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, alpha=10, n_estimators=100)
    }
    predictions = {}

    for model_name, model in models.items():
        model.fit(X_train, y_train)
        preds = np.zeros((n_iterations, X_test.shape[0]))
        for i in range(n_iterations):
            preds[i] = model.predict(X_test)
        predictions[model_name] = preds.mean(axis=0)

    return predictions

# Load data for three companies
tickers = ['BN', 'ENPH', 'FSLR']  # Example tickers for Apple, Google, and Microsoft

# Download historical data for each stock
companies_data = []
for ticker in tickers:
    companies_data.append(yf.download(ticker, start="2021-12-31", end="2024-03-31"))

# Preprocess data and split into training and testing sets
data_splits = []
for df in companies_data:
    X, y = preprocess_data(df)
    train_size = int(len(X) - 100)
    data_splits.append((X[:train_size], X[train_size:], y[:train_size], y[train_size:]))

# Initialize matrices to store results
n_companies = len(companies_data)
n_models = 3
returns_matrix = np.zeros((n_companies, n_models))
confidence_intervals_matrix = np.zeros((n_companies, n_models, 2))  # lower and upper bounds
uncertainty_matrix = np.zeros((n_companies, n_models))

# Train models and calculate predictions
predicted_returns = []
for i, (X_train, X_test, y_train, y_test) in enumerate(data_splits):
    predictions = train_and_predict(X_train, y_train, X_test)
    uncertainties = train_and_predict(X_train, y_train, X_test)
    predicted_returns.append(predictions)

    # Store predictions
    returns_matrix[i, 0] = predictions['Linear Regression'].mean()
    returns_matrix[i, 1] = predictions['Random Forest'].mean()
    # returns_matrix[i, 2] = predictions['XGBoost'].mean()

    # Store confidence intervals (95% CI)
    confidence_intervals_matrix[i, 0, 0] = predictions['Linear Regression'].mean() - 1.96 * uncertainties['Linear Regression'].mean()
    confidence_intervals_matrix[i, 0, 1] = predictions['Linear Regression'].mean() + 1.96 * uncertainties['Linear Regression'].mean()
    confidence_intervals_matrix[i, 1, 0] = predictions['Random Forest'].mean() - 1.96 * uncertainties['Random Forest'].mean()
    confidence_intervals_matrix[i, 1, 1] = predictions['Random Forest'].mean() + 1.96 * uncertainties['Random Forest'].mean()
    # confidence_intervals_matrix[i, 2, 0] = predictions['XGBoost'].mean() - 1.96 * uncertainties['XGBoost'].mean()
    # confidence_intervals_matrix[i, 2, 1] = predictions['XGBoost'].mean() + 1.96 * uncertainties['XGBoost'].mean()

    # Calculate uncertainty as the width of the confidence interval
    uncertainty_matrix[i, 0] = confidence_intervals_matrix[i, 0, 1] - confidence_intervals_matrix[i, 0, 0]
    uncertainty_matrix[i, 1] = confidence_intervals_matrix[i, 1, 1] - confidence_intervals_matrix[i, 1, 0]
    # uncertainty_matrix[i, 2] = confidence_intervals_matrix[i, 2, 1] - confidence_intervals_matrix[i, 2, 0]

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['SMA_5'] = df['Adj Close'].rolling(window=5).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['SMA_200'] = df['Adj Close'].rolling(window=200).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the do

In [45]:
import yfinance as yf
import pandas as pd

# Define the stock tickers
tickers = ['BN', 'ENPH', 'FSLR']  # Example tickers for Apple, Google, and Microsoft

# Download historical data for each stock
data = {}
for ticker in tickers:
    data[ticker] = yf.download(ticker, start="2021-12-31", end="2024-03-31")[['Open', 'Adj Close']]

# Combine the data into a single DataFrame
df_combined = pd.DataFrame()
for ticker in tickers:
    df_temp = data[ticker].copy()
    df_temp.columns = [f'{ticker}_Open', f'{ticker}_Close']
    df_combined = pd.merge(df_combined, df_temp, left_index=True, right_index=True, how='outer')

# Add a sample risk-free rate (e.g., 1% per year, constant)
df_combined['RiskFreeRate'] = 0.02

# Save the combined DataFrame to a CSV file
df_combined.to_csv('combined_stock_data.csv')

# Display the combined DataFrame
print(df_combined.head())

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

              BN_Open   BN_Close   ENPH_Open  ENPH_Close  FSLR_Open  \
Date                                                                  
2021-12-31  48.932903  47.675674  186.899994  182.940002  87.500000   
2022-01-03  48.811642  47.336151  185.229996  184.449997  88.010002   
2022-01-04  48.504444  47.786213  187.589996  178.279999  91.199997   
2022-01-05  49.094585  45.622726  174.559998  157.199997  87.300003   
2022-01-06  46.685532  45.441124  156.009995  151.490005  83.010002   

            FSLR_Close  RiskFreeRate  
Date                                  
2021-12-31   87.160004          0.02  
2022-01-03   88.580002          0.02  
2022-01-04   87.279999          0.02  
2022-01-05   83.510002          0.02  
2022-01-06   83.970001          0.02  





In [46]:
assets = ['BN_Close', 'ENPH_Close', 'FSLR_Close']
for asset in assets:
    df_combined[f'{asset}_Return'] = df_combined[asset].pct_change()
# Calculate daily return for the risk-free rate
df_combined['RiskFreeRate_Return'] = df_combined['RiskFreeRate'].pct_change()

# Drop NaN values
df_combined.dropna(inplace=True)

# Select return columns
return_columns = [f'{asset}_Return' for asset in assets]
returns = df_combined[return_columns]

print(df_combined.head())

              BN_Open   BN_Close   ENPH_Open  ENPH_Close  FSLR_Open  \
Date                                                                  
2022-01-03  48.811642  47.336151  185.229996  184.449997  88.010002   
2022-01-04  48.504444  47.786213  187.589996  178.279999  91.199997   
2022-01-05  49.094585  45.622726  174.559998  157.199997  87.300003   
2022-01-06  46.685532  45.441124  156.009995  151.490005  83.010002   
2022-01-07  46.564266  45.299000  152.110001  145.130005  83.440002   

            FSLR_Close  RiskFreeRate  BN_Close_Return  ENPH_Close_Return  \
Date                                                                       
2022-01-03   88.580002          0.02        -0.007122           0.008254   
2022-01-04   87.279999          0.02         0.009508          -0.033451   
2022-01-05   83.510002          0.02        -0.045274          -0.118241   
2022-01-06   83.970001          0.02        -0.003981          -0.036323   
2022-01-07   84.680000          0.02        -0

In [47]:
def calculate_sharpe_ratio_portfolio(returns, weights, risk_free_rate = 0.02, periods_per_year=252):
    """
    Calculate the Sharpe Ratio for a portfolio of assets.

    Parameters:
    returns (pd.DataFrame): DataFrame of daily returns of the assets.
    weights (np.array): Array of portfolio weights.
    risk_free_rate (float): Risk-free rate, expressed as an annualized rate.
    periods_per_year (int): Number of periods per year (252 for daily returns).

    Returns:
    tuple: The portfolio's volatility and Sharpe Ratio.
    """
    # Calculate the mean and covariance of daily returns
    mean_returns = returns.mean()
    cov_matrix = returns.cov()

    # Calculate the portfolio return and volatility
    portfolio_return = np.dot(weights, mean_returns) * periods_per_year
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights))) * np.sqrt(periods_per_year)

    # Calculate the annualized risk-free rate per period
    risk_free_rate_per_period = (1 + risk_free_rate) ** (1 / periods_per_year) - 1

    # Calculate the excess return
    excess_return = portfolio_return - risk_free_rate

    # Calculate the Sharpe Ratio
    sharpe_ratio = excess_return / portfolio_volatility

    return portfolio_volatility, sharpe_ratio

In [48]:
# Display results
print("Returns Matrix:")
print(returns_matrix)

linear_regression_returns = returns_matrix[:, 0].tolist()
print("Linear Regression Returns:", linear_regression_returns)

random_forest_returns = returns_matrix[:, 1].tolist()
print("Random Forest Returns:", linear_regression_returns)

xg_boost_returns = returns_matrix[:, 2].tolist()
print("XG Boost Returns:", linear_regression_returns)

Returns Matrix:
[[-0.01127247  0.00394015  0.        ]
 [-0.25380682 -0.28522631  0.        ]
 [ 0.07926764 -0.00691058  0.        ]]
Linear Regression Returns: [-0.011272468610035294, -0.25380681804573857, 0.07926763765321836]
Random Forest Returns: [-0.011272468610035294, -0.25380681804573857, 0.07926763765321836]
XG Boost Returns: [-0.011272468610035294, -0.25380681804573857, 0.07926763765321836]


In [49]:
# Prepare results for output
results = {'Linear Regression': [], 'Random Forest': [], 'XGBoost': []}
uncertainty_results = {'Linear Regression': [], 'Random Forest': [], 'XGBoost': []}

for model_index, model_name in enumerate(results.keys()):
    for company_index in range(n_companies):
        results[model_name].append(returns_matrix[company_index, model_index])
        uncertainty_results[model_name].append(uncertainty_matrix[company_index, model_index])
for model_name in results:
    results[model_name] = [round(num, 2) for num in results[model_name]]
#this result is the viewDict
print(results)

for model_name in uncertainty_results:
    uncertainty_results[model_name] = [round(num, 1) for num in uncertainty_results[model_name]]
print("uncertainty")
print(uncertainty_results)


# Convert results to DataFrame and display
results_df = pd.DataFrame(results, index=['BN', 'ENPH', 'FSLR'])
print(results_df)

uncertainty_df = pd.DataFrame(uncertainty_results, index=['BN', 'ENPH', 'FSLR'])
print(uncertainty_df)


{'Linear Regression': [-0.01, -0.25, 0.08], 'Random Forest': [0.0, -0.29, -0.01], 'XGBoost': [0.0, 0.0, 0.0]}
uncertainty
{'Linear Regression': [-0.0, -1.0, 0.3], 'Random Forest': [0.0, -1.1, -0.0], 'XGBoost': [0.0, 0.0, 0.0]}
      Linear Regression  Random Forest  XGBoost
BN                -0.01           0.00      0.0
ENPH              -0.25          -0.29      0.0
FSLR               0.08          -0.01      0.0
      Linear Regression  Random Forest  XGBoost
BN                 -0.0            0.0      0.0
ENPH               -1.0           -1.1      0.0
FSLR                0.3           -0.0      0.0


In [50]:
#ViewDict for Black-LItterman
# for viewDict dictionary
# exmaple
'''viewdict = {
    "AMZN": 0.10,
    "BAC": 0.30,
    "COST": 0.05,
    "DIS": 0.05,
    "DPZ": 0.20,
    "KO": -0.05,  # I think Coca-Cola will go down 5%
    "MCD": 0.15,
    "MSFT": 0.10,
    "NAT": 0.50,  # but low confidence, which will be reflected later
    "SBUX": 0.10
}
'''
# Using Linear Regression model

linear_regression_returns_viewDict = results_df['Linear Regression'].to_dict()
print(linear_regression_returns_viewDict)

rf_returns_viewDict = results_df['Random Forest'].to_dict()
print(rf_returns_viewDict)

xg_returns_viewDict = results_df['XGBoost'].to_dict()
print(xg_returns_viewDict)

{'BN': -0.01, 'ENPH': -0.25, 'FSLR': 0.08}
{'BN': 0.0, 'ENPH': -0.29, 'FSLR': -0.01}
{'BN': 0.0, 'ENPH': 0.0, 'FSLR': 0.0}


In [51]:
print(results_df['Linear Regression'].tolist())

[-0.01, -0.25, 0.08]


In [52]:
lr_weights = np.array(results_df['Random Forest'].tolist())
portfolio_volatility, sharpe_ratio = calculate_sharpe_ratio_portfolio(returns,lr_weights, 0.02)
print(f"Portfolio Volatility: {portfolio_volatility:.4f}")
print(f"Sharpe Ratio for the portfolio: {sharpe_ratio:.4f}")

Portfolio Volatility: 0.1969
Sharpe Ratio for the portfolio: -0.1797


In [53]:
#uncertainty matrix
'''confidences = [
    0.6,
    0.4,
    0.2,
    0.5,
    0.7, # confident in dominos
    0.7, # confident KO will do poorly
    0.7,
    0.5,
    0.1,
    0.4
]
'''

linear_regression_confidences = uncertainty_df['Linear Regression'].to_list()
print(linear_regression_confidences)

rf_confidences = uncertainty_df['Random Forest'].to_list()
print(rf_confidences)

xgboost_confidences = uncertainty_df['XGBoost'].to_list()
print(xgboost_confidences)

[-0.0, -1.0, 0.3]
[0.0, -1.1, -0.0]
[0.0, 0.0, 0.0]


# Portfolio Optimization


In [54]:
!pip install pandas numpy matplotlib yfinance PyPortfolioOpt



In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from pypfopt import EfficientFrontier, risk_models, black_litterman, expected_returns, BlackLittermanModel, HRPOpt, CLA

In [56]:
tickers = ["BN", "ENPH", "FSLR"]
ohlc = yf.download(tickers, start='2022-01-01',end='2024-01-01')
prices = ohlc["Adj Close"]
prices.head()

[*********************100%%**********************]  3 of 3 completed


Ticker,BN,ENPH,FSLR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-03,47.336151,184.449997,88.580002
2022-01-04,47.786217,178.279999,87.279999
2022-01-05,45.622723,157.199997,83.510002
2022-01-06,45.441128,151.490005,83.970001
2022-01-07,45.299004,145.130005,84.68


In [57]:
market_prices = yf.download("SPY", start='2022-01-01',end='2024-01-01')["Adj Close"]
market_prices.tail()

[*********************100%%**********************]  1 of 1 completed


Date
2023-12-22    470.664490
2023-12-26    472.651886
2023-12-27    473.506500
2023-12-28    473.685333
2023-12-29    472.314026
Name: Adj Close, dtype: float64

In [58]:
S = risk_models.CovarianceShrinkage(prices).ledoit_wolf()
delta = black_litterman.market_implied_risk_aversion(market_prices)
delta

0.3203796762805008

In [59]:
daily_returns = prices.pct_change().dropna()
mu = expected_returns.mean_historical_return(prices)
daily_returns.head()

Ticker,BN,ENPH,FSLR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-04,0.009508,-0.033451,-0.014676
2022-01-05,-0.045274,-0.118241,-0.043194
2022-01-06,-0.00398,-0.036323,0.005508
2022-01-07,-0.003128,-0.041983,0.008455
2022-01-10,-0.012027,0.013781,-0.004487


In [60]:
def deviation_risk_parity(w, cov_matrix):
    diff = w * np.dot(cov_matrix, w) - (w * np.dot(cov_matrix, w)).reshape(-1, 1)
    return (diff**2).sum().sum()

In [61]:
ef = EfficientFrontier(mu, S)
weights = ef.nonconvex_objective(deviation_risk_parity, ef.cov_matrix)
ef.portfolio_performance(verbose=True)

Expected annual return: 4.4%
Annual volatility: 37.4%
Sharpe Ratio: 0.06


(0.04399302280471225, 0.37397449514591635, 0.06415684255513392)

**Black-Litterman**

In [66]:
mcaps = {}
for t in tickers:
    stock = yf.Ticker(t)
    shares_outstanding = stock.info.get('sharesOutstanding')
    marketCap=stock.history(start="2023-12-29", end="2024-03-31")['Close'].values[0] * shares_outstanding
    mcaps[t] = marketCap
mcaps

{'BN': 61927684736.36719,
 'ENPH': 17894662997.34497,
 'FSLR': 18442057029.327393}

In [67]:
prior = black_litterman.market_implied_prior_returns(mcaps, delta, S)

In [90]:
#linear_regression_returns_viewDict
#rf_returns_viewDict
#xg_returns_viewDict
bl = BlackLittermanModel(S, pi=prior, absolute_views=linear_regression_returns_viewDict, view_confidences=linear_regression_confidences)
rets = bl.bl_returns()
ef = EfficientFrontier(rets, S)
ef.max_sharpe()
print(ef.clean_weights())
predicted_features=ef.portfolio_performance(verbose=True)
predicted_features

OrderedDict([('BN', 0.0), ('ENPH', 0.0), ('FSLR', 1.0)])
Expected annual return: 2.3%
Annual volatility: 52.9%
Sharpe Ratio: 0.01


(0.02341547604146384, 0.5294945581195306, 0.006450445975485954)

### Validation

In [77]:
# use wights from the model to calculate the actual annual returns
weights = ef.clean_weights()
weights = np.array(list(weights.values()))
actual_returns = np.dot(weights, returns[-100:].mean().to_list()) * 252

(array([0., 0., 1.]), 0.44518034198599654)

In [91]:
# use wights from the model to calculate the actual annual volatility and sharpe ratio
actual_volatility, actual_sharpe_ratio = calculate_sharpe_ratio_portfolio(returns[-100:], weights, 0.02)
print(f"Portfolio Return: {actual_returns:.4f}")
print(f"Portfolio Volatility: {actual_volatility:.4f}")
print(f"Sharpe Ratio for the portfolio: {actual_sharpe_ratio:.2f}")

Portfolio Return: 0.4452
Portfolio Volatility: 0.4674
Sharpe Ratio for the portfolio: 0.91


In [94]:
# compare the actual features with the predictied features
distance = np.array([actual_returns, actual_volatility, actual_sharpe_ratio]) - predicted_features
distance

array([ 0.42176487, -0.06205371,  0.90314131])

**Hierarchical risk parity**

In [65]:
hrp = HRPOpt(daily_returns)
weights = hrp.optimize()
hrp.portfolio_performance(verbose=True)
print(weights)

import pypfopt.plotting as plotting
plotting.plot_dendrogram(hrp)  # to plot dendrogram

Expected annual return: 7.2%
Annual volatility: 33.6%
Sharpe Ratio: 0.16
OrderedDict([('BN', 0.7063330923023521), ('ENPH', 0.11260492442694024), ('FSLR', 0.1810619832707077)])


  w[first_cluster] *= alpha  # weight 1


OSError: 'seaborn-deep' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)

In [None]:
cla = CLA(mu, S)
print(cla.max_sharpe())
cla.portfolio_performance(verbose=True)
plotting.plot_efficient_frontier(cla)  # to plot

# Summary

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'svg'

import numpy as np
import pandas as pd
import seaborn as sns
import datetime as dt
import matplotlib.pyplot as plt

import universal as up
from universal import tools, algos
from universal.algos import *

sns.set_context("notebook")
plt.rcParams["figure.figsize"] = (16, 8)

In [None]:
# Prepare data for visualizations
'''
model_names = ['Linear Regression', 'Random Forest', 'XGBoost']
company_names = ['Company 1', 'Company 2', 'Company 3']

# Box Plots

for i in range(n_companies):
    plt.figure(figsize=(14, 7))
    data = [predicted_returns[i][model_name] for model_name in model_names]
    plt.boxplot(data, labels=model_names)
    plt.title(f'Box Plot of Predicted Returns for {company_names[i]}')
    plt.ylabel('Expected Return')
    plt.show()

# Line Plots for Actual vs. Predicted Returns
for i, (X_train, X_test, y_train, y_test) in enumerate(data_splits):
    predictions = predicted_returns[i]

    plt.figure(figsize=(14, 7))
    plt.plot(y_test.index, y_test, label='Actual Returns', color='blue')
    plt.plot(y_test.index, predictions['Linear Regression'], label='Predicted Returns - Linear Regression', color='red')
    plt.plot(y_test.index, predictions['Random Forest'], label='Predicted Returns - Random Forest', color='green')
    plt.plot(y_test.index, predictions['XGBoost'], label='Predicted Returns - XGBoost', color='orange')
    plt.title(f'Actual vs Predicted Returns for {company_names[i]}')
    plt.xlabel('Date')
    plt.ylabel('Returns')
    plt.legend()
    plt.show()

# Scatter Plots for Actual vs. Predicted Returns
for i, (X_train, X_test, y_train, y_test) in enumerate(data_splits):
    predictions = predicted_returns[i]

    plt.figure(figsize=(14, 7))
    plt.scatter(y_test, predictions['Linear Regression'], label='Linear Regression', color='red')
    plt.scatter(y_test, predictions['Random Forest'], label='Random Forest', color='green')
    plt.scatter(y_test, predictions['XGBoost'], label='XGBoost', color='orange')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
    plt.title(f'Scatter Plot of Actual vs Predicted Returns for {company_names[i]}')
    plt.xlabel('Actual Returns')
    plt.ylabel('Predicted Returns')
    plt.legend()
    plt.show()
'''

In [None]:

print("\nConfidence Intervals Matrix (95% CI):")
print(confidence_intervals_matrix)

# Prepare data for visualizations
model_names = ['Linear Regression', 'Random Forest', 'XGBoost']
company_names = ['Company 1', 'Company 2', 'Company 3']

# Plot predicted returns with confidence intervals
for i in range(n_companies):
    plt.figure(figsize=(14, 7))
    for j in range(n_models):
        plt.errorbar(j, returns_matrix[i, j],
                     yerr=[[returns_matrix[i, j] - confidence_intervals_matrix[i, j, 0]], [confidence_intervals_matrix[i, j, 1] - returns_matrix[i, j]]],
                     fmt='o', capsize=5, label=f'{model_names[j]} Mean Return')
    plt.title(f'Returns and Confidence Intervals for {company_names[i]}')
    plt.xlabel('Model')
    plt.ylabel('Expected Return')
    plt.xticks(range(n_models), model_names)
    plt.legend()
    plt.show()

In [None]:
import seaborn as sns

for i in range(n_companies):
    plt.figure(figsize=(14, 7))
    for model_name in model_names:
        sns.kdeplot(predicted_returns[i][model_name], label=model_name)
    plt.title(f'Density Plot of Predicted Returns for {company_names[i]}')
    plt.xlabel('Expected Return')
    plt.ylabel('Density')
    plt.legend()
    plt.show()

In [None]:
model_names = ['Linear Regression', 'Random Forest', 'XGBoost']
company_names = ['Company 1', 'Company 2', 'Company 3']

for i in range(n_companies):
    plt.figure(figsize=(14, 7))
    for j in range(n_models):
        plt.bar(j, returns_matrix[i, j], label=f'{model_names[j]} Mean Return')
    plt.title(f'Predicted Returns for {company_names[i]}')
    plt.xlabel('Model')
    plt.ylabel('Expected Return')
    plt.xticks(range(n_models), model_names)
    plt.legend()
    plt.show()

In [None]:
for i, (X_train, X_test, y_train, y_test) in enumerate(data_splits):
    predictions = train_and_predict(X_train, y_train, X_test)

    plt.figure(figsize=(14, 7))
    plt.plot(y_test.index, y_test, label='Actual Returns', color='blue')
    plt.plot(y_test.index, predictions['LinearRegression'], label='Predicted Returns - Linear Regression', color='red')
    plt.plot(y_test.index, predictions['RandomForest'], label='Predicted Returns - Random Forest', color='green')
    plt.plot(y_test.index, predictions['XGBoost'], label='Predicted Returns - XGBoost', color='orange')
    plt.title(f'Actual vs Predicted Returns for {company_names[i]}')
    plt.xlabel('Date')
    plt.ylabel('Returns')
    plt.legend()
    plt.show()