In [None]:
from yahooquery import Ticker
import pandas_datareader as pdr
import datetime
import numpy as np
import pandas as pd
import warnings
import seaborn as sns
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from scipy import stats
import json
from IPython.display import FileLink
import os
from scipy.optimize import minimize
from math import sqrt
import statsmodels.api as sm
from arch import arch_model
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
warnings.filterwarnings('ignore')

In [None]:
file_path = 'Nov 14 Data Update.xlsx'
df_cleaned = pd.read_excel(file_path, sheet_name='Sheet1', skiprows=2)
df_cleaned['ticker_symbol'] = df_cleaned['Ticker'].str.replace(' US Equity', '')
df_cleaned = df_cleaned.drop(index=0)
df_cleaned = df_cleaned.reset_index(drop=True)
df_cleaned = df_cleaned.drop(df_cleaned.index[-2:])
# df_filtered = df_cleaned[['Ticker', 'Short Name', 'GICS Sector', 'ROE LF', 'Quarterly Momentum', 'P/CF']].dropna()


In [None]:
# Get the 10-year Treasury yield as a proxy for the risk-free rate
start_date = datetime.datetime(2023, 1, 1)
end_date = datetime.datetime.now()

df = pdr.DataReader('DGS10', 'fred', start_date, end_date)
risk_free_rate = df['DGS10'].iloc[-1] / 100

In [None]:
list_of_symbols = df_cleaned['ticker_symbol'].tolist()
def iterate_revenue_and_net(list_of_syms):
    arr = []
    for symbol in list_of_syms:
        aapl = Ticker(symbol)
        types = ['TotalRevenue', 'NetIncome']
        data = aapl.get_financial_data(types, trailing=False)
        arr.append(data)
    return arr


test = iterate_revenue_and_net(list_of_symbols)

In [None]:
# Filter out non-DataFrame items
test = [df for df in test if isinstance(df, pd.DataFrame)]

# Now concatenate
combined_df = pd.concat(test)

In [None]:
combined_df['Profit Margin'] = combined_df['NetIncome'] / combined_df['TotalRevenue']
combined_df['Profit Margin Growth'] = combined_df.groupby('symbol')['Profit Margin'].pct_change()



In [None]:
combined_df = combined_df.dropna(axis=0)

In [None]:
def analyze_ticker_and_update_df(ticker_symbol, df):
    # Fetch stock data using yahooquery
    stock = Ticker(ticker_symbol)
    history = stock.history(period='2y')
    
    # Ensure history data exists
    if history.empty:
        print(f"No data found for {ticker_symbol}")
        return df
    
    # Calculate daily log returns and add a column to the historical data
    history['Log Return'] = np.log(history['adjclose'] / history['adjclose'].shift(1))
    
    # Calculate expected annual return (compounded daily return)
    avg_daily_return = history['Log Return'].mean()
    expected_annual_return = avg_daily_return * 252  # 252 trading days in a year
    
    # MACD calculation (momentum indicator)
    short_ema = history['adjclose'].ewm(span=12, adjust=False).mean()  # 12-day EMA
    long_ema = history['adjclose'].ewm(span=26, adjust=False).mean()   # 26-day EMA
    macd = short_ema - long_ema                                     # MACD line
    signal = macd.ewm(span=9, adjust=False).mean()                  # Signal line
    macd_diff = macd - signal                                       # MACD histogram (momentum)
    
    # Add the momentum (MACD histogram) to the historical data
    history['Momentum'] = macd_diff
    
    # Bollinger Bands Calculation
    history['EMA'] = history['adjclose'].ewm(span=20, adjust=False).mean()
    history['STD'] = history['adjclose'].rolling(window=20).std()
    history['Upper Band'] = history['EMA'] + (history['STD'] * 2)
    history['Lower Band'] = history['EMA'] - (history['STD'] * 2)
    
    # Relative Strength Index
    delta = history['adjclose'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    history['RSI'] = 100 - (100 / (1 + rs))
    
    # Get latest values for indicators
    latest_adjclose = history['adjclose'].iloc[-1]
    latest_upper_band = history['Upper Band'].iloc[-1]
    latest_lower_band = history['Lower Band'].iloc[-1]
    latest_rsi = history['RSI'].iloc[-1]
    latest_log_return = history['Log Return'].iloc[-1]
    latest_momentum = history['Momentum'].iloc[-1]
    
    # Calculate Buy/Sell signals
    rec_consensus = df.loc[df['ticker_symbol'] == ticker_symbol, 'Rec Consensus:D-1'].values[0]
    if latest_adjclose < latest_lower_band and latest_rsi < 30 and rec_consensus >= 4:
        buy_sell_signal = 'Buy'
    elif latest_adjclose > latest_upper_band and latest_rsi > 70 and rec_consensus <= 2:
        buy_sell_signal = 'Sell'
    else:
        buy_sell_signal = 'Hold'
        
    if latest_rsi < 30:
        analysis_signal = 'Buy'
    elif latest_rsi > 70:
        analysis_signal = 'Sell'
    else:
        analysis_signal = 'Hold'
        
    if rec_consensus >= 4.00:
        analyst_signal = 'Buy'
    elif rec_consensus <= 2.00:
        analyst_signal = 'Sell'
    else:
        analyst_signal = 'Hold'
    
    profit_margins = df.loc[df['ticker_symbol'] == ticker_symbol, ['PM:2021C', 'PM:2022C', 'PM:2023C', 'PM LF']].values[0]
    momentum = (profit_margins[-1] - profit_margins[-2]) / profit_margins[-2]
    profit_margin_series = pd.Series(profit_margins, index=['2021', '2022', '2023', 'LF'])
    # Calculate MACD
    pm_short_ema = profit_margin_series.ewm(span=12, adjust=False).mean()
    pm_long_ema = profit_margin_series.ewm(span=26, adjust=False).mean()
    pm_macd = short_ema - long_ema
    pm_signal = macd.ewm(span=9, adjust=False).mean()
    pm_macd_diff = macd - signal
    growth_rates = []
    for i in range(1, len(profit_margins)):
        growth_rate = (profit_margins[i] - profit_margins[i-1]) / profit_margins[i-1]
        growth_rates.append(growth_rate)

    # Calculate the total growth rate by compounding annual growth rates
    total_growth_rate = 1
    for growth_rate in growth_rates:
        total_growth_rate *= (1 + growth_rate)

    total_growth_rate -= 1  # Convert to percentage format

    # Add the total growth rate to the DataFrame
    df.loc[df['ticker_symbol'] == ticker_symbol, 'Total Profit Margin Growth'] = total_growth_rate * 100
    df.loc[df['ticker_symbol'] == ticker_symbol, 'Profit Margin Momentum'] = momentum * 100
    # Update the main df with calculated values
    df.loc[df['ticker_symbol'] == ticker_symbol, 'Log Return'] = latest_log_return
    df.loc[df['ticker_symbol'] == ticker_symbol, 'Momentum'] = latest_momentum
    df.loc[df['ticker_symbol'] == ticker_symbol, 'Expected Annual Return'] = expected_annual_return
    df.loc[df['ticker_symbol'] == ticker_symbol, 'Buy Sell Signal'] = buy_sell_signal
    df.loc[df['ticker_symbol'] == ticker_symbol, 'Analysis Signal'] = analysis_signal
    df.loc[df['ticker_symbol'] == ticker_symbol, 'Recommendation Signal'] = analyst_signal
    df.loc[df['ticker_symbol'] == ticker_symbol, 'PM_MACD'] = pm_macd.iloc[-1]
    df.loc[df['ticker_symbol'] == ticker_symbol, 'PM_Signal'] = pm_signal.iloc[-1]
    df.loc[df['ticker_symbol'] == ticker_symbol, 'PM_MACD_Diff'] = pm_macd_diff.iloc[-1]
    # df.loc[df['ticker_symbol'] == ticker_symbol, 'PM YoY Growth'] = [yoy_growth_rates] 
    # df.loc[df['ticker_symbol'] == ticker_symbol, 'PM Trend'] = trend
    # df.loc[df['ticker_symbol'] == ticker_symbol, 'PM Avg Growth Rate'] = avg_growth_rate
    # df.loc[df['ticker_symbol'] == ticker_symbol, 'PM Momentum'] = overall_momentum_sign

    return df

# Assuming ticker_df is the original DataFrame containing ticker symbols
# ticker_df = pd.DataFrame({'ticker_symbol': ['AAPL', 'MSFT', 'GOOGL', 'TSLA']})

# Prepare the DataFrame with empty columns for analysis data
df_cleaned['Log Return'] = np.nan
df_cleaned['Momentum'] = np.nan
df_cleaned['Expected Annual Return'] = np.nan
df_cleaned['Buy Sell Signal'] = np.nan
df_cleaned['Analysis Signal'] = np.nan
df_cleaned['Recommendation Signal'] = np.nan
df_cleaned['Total Profit Margin Growth'] = np.nan
df_cleaned['Profit Margin Momentum'] = np.nan
df_cleaned['PM_MACD'] = np.nan
df_cleaned['PM_Signal'] = np.nan
df_cleaned['PM_MACD_Diff'] = np.nan


# Get income statements with profit margin calculations
# income_df = get_income_statements(df_cleaned)

# Loop over each ticker symbol in df_cleaned and update the main DataFrame
for ticker in df_cleaned['ticker_symbol']:
    df_cleaned = analyze_ticker_and_update_df(ticker, df_cleaned)

# Display the updated df_cleaned DataFrame
df_cleaned


In [None]:
# Calculate the mean of the 'Profit Margin Growth' column for each ticker symbol in combined_df
mean_profit_margin_growth = combined_df.groupby('symbol')['Profit Margin Growth'].mean().reset_index()
# Rename the 'Profit Margin Growth' column to distinguish it in df_cleaned
mean_profit_margin_growth.rename(columns={'Profit Margin Growth': 'Mean Profit Margin Growth'}, inplace=True)
# Merge the mean profit margin growth data into df_cleaned based on the ticker symbol
df_cleaned = df_cleaned.merge(mean_profit_margin_growth, how='left', left_on='ticker_symbol', right_on='symbol')
# Drop the extra 'symbol' column that results from the merge if it's not needed
df_cleaned.drop(columns=['symbol'], inplace=True)
# Display the updated df_cleaned DataFrame
df_cleaned


In [None]:
# df_cleaned['asset_turnover'] = df_cleaned['Revenue LF'] / df_cleaned['Tot Assets LF']
# df_cleaned['financial_leverage'] = df_cleaned['Tot Assets LF'] / df_cleaned['Tot Eqty LF']
df_cleaned['earnings_yield'] = 1 / df_cleaned['P/E:D-1']
df_cleaned['book_to_market'] = 1 / df_cleaned['P/B:D-1']
df_cleaned['CF/P'] = 1 / df_cleaned['P/CF:D-1']
df_filtered = df_cleaned[['Ticker', 'Short Name', 'earnings_yield', 'book_to_market', 'CF/P', 'Expected Annual Return', 'Momentum', 'ticker_symbol', 'Log Return', 'Mean Profit Margin Growth', 'GICS Sector', 'GICS Ind Name', 'ROE:Y', 'Recommendation Signal', 'PM_MACD_Diff']].dropna()
df_filtered



In [None]:
weights = {
    'ROE:Y': 0.15,
    'Momentum': 0.25,
    'Profit Margin MACD': 0.15,
    'earnings_yield': 0.20,
    'CF/P': 0.10,
    'book_to_market': 0.15
}
# Z-score the factors by sector
df_filtered['Z_ROE'] = df_filtered.groupby('GICS Sector')['ROE:Y'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_Momentum'] = df_filtered.groupby('GICS Sector')['Momentum'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_Profit Margin MACD'] = df_filtered.groupby('GICS Sector')['PM_MACD_Diff'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_earnings_yield'] = df_filtered.groupby('GICS Sector')['earnings_yield'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_CF/P'] = df_filtered.groupby('GICS Sector')['CF/P'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_book_to_market'] = df_filtered.groupby('GICS Sector')['book_to_market'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
# Compute a weighted score for each company
df_filtered['Weighted_Score'] = (df_filtered['Z_ROE'] * weights['ROE:Y'] +
                                 df_filtered['Z_Momentum'] * weights['Momentum'] +
                                df_filtered['Z_Profit Margin MACD'] * weights['Profit Margin MACD'] +
                                df_filtered['Z_earnings_yield'] * weights['earnings_yield'] + df_filtered['Z_CF/P'] * weights['CF/P'] +
                                df_filtered['Z_book_to_market'] * weights['book_to_market'])

# Sort by the weighted score
df_sorted = df_filtered.sort_values(by='Weighted_Score', ascending=False)
# Extract the top 40 and bottom 40 companies
top_60_long = df_sorted.head(100)
top_60_long.to_excel("top_100_long_companies_november_14_UPDATE.xlsx", index=False)
# If you want to display the results in the console, you can print them as well:
# print("Top 40 Companies:")
# print(top_40_long)




In [None]:
weights = {
    'ROE:Y': 0.05,
    'Momentum': 0.50,
    'Profit Margin MACD': 0.05,
    'earnings_yield': 0.10,
    'CF/P': 0.15,
    'book_to_market': 0.15
}
# Z-score the factors by sector
df_filtered['Z_ROE'] = df_filtered.groupby('GICS Sector')['ROE:Y'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_Momentum'] = df_filtered.groupby('GICS Sector')['Momentum'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_Profit Margin MACD'] = df_filtered.groupby('GICS Sector')['Mean Profit Margin Growth'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_earnings_yield'] = df_filtered.groupby('GICS Sector')['earnings_yield'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_CF/P'] = df_filtered.groupby('GICS Sector')['CF/P'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
df_filtered['Z_book_to_market'] = df_filtered.groupby('GICS Sector')['book_to_market'].transform(lambda x: stats.zscore(x, nan_policy='omit'))
# Compute a weighted score for each company
df_filtered['Weighted_Score'] = (df_filtered['Z_ROE'] * weights['ROE:Y'] +
                                 df_filtered['Z_Momentum'] * weights['Momentum'] + df_filtered['Z_Profit Margin MACD'] * weights['Profit Margin MACD'] +
                                df_filtered['Z_earnings_yield'] * weights['earnings_yield'] + df_filtered['Z_CF/P'] * weights['CF/P'] +
                                df_filtered['Z_book_to_market'] * weights['book_to_market'])

# Sort by the weighted score
df_sorted = df_filtered.sort_values(by='Weighted_Score', ascending=False).dropna(axis=0)

# Extract the top 40 and bottom 40 companies
bottom_60_short = df_sorted.tail(100)
bottom_60_short.to_excel("bottom_100_short_companies_november_14_UPDATED.xlsx", index=False)

# print("\nBottom 40 Companies:")
# print(bottom_40_short)



In [None]:
top_40_long.to_excel("/Downloads/top_40_companies_latest.xlsx", index=False)
bottom_40_short.to_excel("/Downloads/bottom_40_companies_latest.xlsx", index=False)
# Display download links
display(FileLink("/Downloads/top_40_companies_latest.xlsx"))
display(FileLink("/Downloads/bottom_40_companies_latest.xlsx"))

In [None]:
current_directory = os.getcwd()
print("Current working directory:", current_directory)

# Save files directly to the current directory
top_40_long.to_excel("top_40_companies_long_buy_latest.xlsx", index=False)
bottom_40_short.to_excel("bottom_40_companies_short_sell_latest.xlsx", index=False)

print("Files saved to:", current_directory)

In [None]:
updated_file_path = 'Financial Data Historical.csv'
df_cleaned = pd.read_csv(updated_file_path)



In [None]:
df_sorted = df_filtered.sort_values(by='Weighted_Score', ascending=False)
top_60_long = df_sorted.head(60)
bottom_60_short = df_sorted.tail(60)

In [None]:
top_60_long

In [None]:
top_60_long_symbols = top_60_long['ticker_symbol'].tolist()
bottom_60_short_symbols = bottom_60_short['ticker_symbol'].tolist()

In [None]:
bottom_60_short_symbols

In [None]:
top_60_long_data = Ticker(top_60_long_symbols).history(start='2022-11-06', end='2024-11-05').dropna()
bottom_60_short_data = Ticker(bottom_60_short_symbols).history(start='2022-11-06', end='2024-11-05').dropna()
market_data = Ticker('^GSPC').history(start='2022-11-06', end='2024-11-05').dropna()

market_data['log_return'] = np.log(market_data['adjclose'] / market_data['adjclose'].shift(1))
top_60_long_data['log_return'] = np.log(top_60_long_data['adjclose'] / top_60_long_data['adjclose'].shift(1))
bottom_60_short_data['log_return'] = np.log(bottom_60_short_data['adjclose'] / bottom_60_short_data['adjclose'].shift(1))

bottom_60_short_data['std_dev_log_return'] = bottom_60_short_data.groupby(level='symbol')['log_return'].transform('std')
top_60_long_data['std_dev_log_return'] = top_60_long_data.groupby(level='symbol')['log_return'].transform('std')
market_data['std_dev_log_return'] = market_data['log_return'].std()

market_data['variance'] = market_data['log_return'].var()
market_data['expected_annual_return'] = market_data['log_return'].mean() * 252
market_data['expected_annual_volatility'] = market_data['std_dev_log_return'].mean() * sqrt(252)
market_variance = market_data['log_return'].var()
market_return = market_data['log_return'].mean()

market_data = market_data.dropna()
top_60_long_data = top_60_long_data.dropna()
bottom_60_short_data = bottom_60_short_data.dropna()

bottom_60_short_data['std_dev_log_return'] = bottom_60_short_data.groupby(level='symbol')['log_return'].transform('std')
top_60_long_data['std_dev_log_return'] = top_60_long_data.groupby(level='symbol')['log_return'].transform('std')
market_data['std_dev_log_return'] = market_data['log_return'].std()

r_f = risk_free_rate / 252 


In [None]:
def calculate_cov_with_market(stock_returns):
    # Align both series by date to ensure they match for covariance calculation
    stock_returns_aligned, market_returns_aligned = stock_returns.align(aligned_market_returns, join='inner')
    return stock_returns_aligned.cov(market_returns_aligned)


def calculate_cov_with_market_long(stock_returns):
    # Align both series by date to ensure they match for covariance calculation
    stock_returns_aligned, market_returns_aligned = stock_returns.align(aligned_market_returns, join='inner')
    return stock_returns_aligned.cov(market_returns_aligned)

def garch_volatility_forecast(returns, periods=1):
    # Fit GARCH(1,1) model to the stock's log returns
    model = arch_model(returns, vol='Garch', p=1, q=1, dist='normal')
    model_fit = model.fit(disp="off")
    # Forecast volatility for the next `periods` period(s)
    forecasts = model_fit.forecast(horizon=periods)
    # Extract forecasted conditional volatility (standard deviation)
    return forecasts.variance.iloc[-1, 0] ** 0.5  # Taking the square root for volatility

short_dataset_indexed = bottom_60_short_data.reset_index()
long_dataset_indexed = top_60_long_data.reset_index()
market_dataset_indexed = market_data.reset_index()

# Set 'symbol' as the new index
short_dataset = short_dataset_indexed.set_index('date')
long_dataset = long_dataset_indexed.set_index('date')
market_dataset = market_dataset_indexed.set_index('date')

aligned_market_returns = market_dataset['log_return'].reindex(short_dataset.index.get_level_values('date')).dropna()
aligned_market_returns_long = market_dataset['log_return'].reindex(long_dataset.index.get_level_values('date')).dropna()
short_dataset['cov_with_market'] = short_dataset.groupby('symbol')['log_return'].transform(calculate_cov_with_market)
long_dataset['cov_with_market'] = long_dataset.groupby('symbol')['log_return'].transform(calculate_cov_with_market_long)
short_dataset['stock_beta'] = (short_dataset['cov_with_market']/market_variance)
long_dataset['stock_beta'] = (long_dataset['cov_with_market']/market_variance)
long_dataset['expected_return_capm'] = r_f + long_dataset['stock_beta'] * (market_return - r_f)
short_dataset['expected_return_capm'] = r_f + short_dataset['stock_beta'] * (market_return - r_f)

short_dataset['garch_volatility'] = short_dataset.groupby('symbol')['log_return'].transform(garch_volatility_forecast)
long_dataset['garch_volatility'] = long_dataset.groupby('symbol')['log_return'].transform(garch_volatility_forecast)

# Adjust expected return using forecasted volatility in a CAPM-like formula
short_dataset['expected_return_garch'] = r_f + short_dataset['stock_beta'] * (market_return - r_f) * short_dataset['garch_volatility']
long_dataset['expected_return_garch'] = r_f + long_dataset['stock_beta'] * (market_return - r_f) * long_dataset['garch_volatility']



In [None]:
short_dataset

In [None]:
# Feature Engineering for the Long Portfolio (top_60_long_data)
long_features = long_dataset[['log_return', 'std_dev_log_return', 'stock_beta']]  # Example features
X_long = long_features.dropna()
y_long = X_long['log_return']  # Target: Expected Return (log_return)

# Split the data into training and testing sets for the Long Portfolio
X_train_long, X_test_long, y_train_long, y_test_long = train_test_split(X_long, y_long, test_size=0.2, random_state=42)

# 1. Random Forest for the Long Portfolio
rf_model_long = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model_long.fit(X_train_long, y_train_long)

# Predict for Long Portfolio
y_pred_rf_long = rf_model_long.predict(X_test_long)
mse_rf_long = mean_squared_error(y_test_long, y_pred_rf_long)
print(f"Random Forest MSE for Long Portfolio: {mse_rf_long}")

# 2. Gradient Boosting for the Long Portfolio
gb_model_long = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model_long.fit(X_train_long, y_train_long)

# Predict for Long Portfolio
y_pred_gb_long = gb_model_long.predict(X_test_long)
mse_gb_long = mean_squared_error(y_test_long, y_pred_gb_long)
print(f"Gradient Boosting MSE for Long Portfolio: {mse_gb_long}")

# 3. Support Vector Machine (SVM) for the Long Portfolio
svm_model_long = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svm_model_long.fit(X_train_long, y_train_long)

# Predict for Long Portfolio
y_pred_svm_long = svm_model_long.predict(X_test_long)
mse_svm_long = mean_squared_error(y_test_long, y_pred_svm_long)
print(f"SVM MSE for Long Portfolio: {mse_svm_long}")

# Store predicted returns for the Long Portfolio
long_dataset['predicted_return_rf'] = rf_model_long.predict(long_features.dropna())
long_dataset['predicted_return_gb'] = gb_model_long.predict(long_features.dropna())
long_dataset['predicted_return_svm'] = svm_model_long.predict(long_features.dropna())

# Repeat the process for the Short Portfolio (bottom_60_short_data)

# Feature Engineering for the Short Portfolio (bottom_60_short_data)
short_features = short_dataset[['log_return', 'std_dev_log_return', 'stock_beta']]  # Example features
X_short = short_features.dropna()
y_short = X_short['log_return']  # Target: Expected Return (log_return)

# Split the data into training and testing sets for the Short Portfolio
X_train_short, X_test_short, y_train_short, y_test_short = train_test_split(X_short, y_short, test_size=0.2, random_state=42)

# 1. Random Forest for the Short Portfolio
rf_model_short = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model_short.fit(X_train_short, y_train_short)

# Predict for Short Portfolio
y_pred_rf_short = rf_model_short.predict(X_test_short)
mse_rf_short = mean_squared_error(y_test_short, y_pred_rf_short)
print(f"Random Forest MSE for Short Portfolio: {mse_rf_short}")

# 2. Gradient Boosting for the Short Portfolio
gb_model_short = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model_short.fit(X_train_short, y_train_short)

# Predict for Short Portfolio
y_pred_gb_short = gb_model_short.predict(X_test_short)
mse_gb_short = mean_squared_error(y_test_short, y_pred_gb_short)
print(f"Gradient Boosting MSE for Short Portfolio: {mse_gb_short}")

# 3. Support Vector Machine (SVM) for the Short Portfolio
svm_model_short = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svm_model_short.fit(X_train_short, y_train_short)

# Predict for Short Portfolio
y_pred_svm_short = svm_model_short.predict(X_test_short)
mse_svm_short = mean_squared_error(y_test_short, y_pred_svm_short)
print(f"SVM MSE for Short Portfolio: {mse_svm_short}")

# Store predicted returns for the Short Portfolio
short_dataset['predicted_return_rf'] = rf_model_short.predict(short_features.dropna())
short_dataset['predicted_return_gb'] = gb_model_short.predict(short_features.dropna())
short_dataset['predicted_return_svm'] = svm_model_short.predict(short_features.dropna())

# Now both the long_dataset and short_dataset contain the predicted returns using the three models
short_dataset

In [None]:
df_single_index_short['stock_beta'] = (df_single_index_short['cov_with_market']/test_variance)

In [None]:
# Pivot the dataframe to have each 'symbol' as a column, with 'log_return' as the values
returns_pivot = df_single_index_short.pivot(columns='symbol', values='log_return')

# Drop any rows with NaN values in 'log_return' columns
returns_pivot = returns_pivot.dropna()

# Calculate the covariance matrix
cov_matrix = returns_pivot.cov()

cov_matrix

In [None]:
weight_count = len(bottom_60_short_symbols)
weight_count

In [None]:
short_weights = np.zeros((weight_count,),dtype = float )

In [None]:
expected_returns = np.array([])

In [None]:
portfolio_1_tickers = ['AAPL', 'NFLX']
portfolio_2_tickers = ['AAPL', 'WBD', '^GSPC', '^IXIC', 'NFLX', 'DIS']
portfolio_1_data = Ticker(portfolio_1_tickers).history(period='1d', start='2019-03-25', end='2024-09-30').dropna()
portfolio_2_data = Ticker(portfolio_2_tickers).history(period='1d', start='2019-03-25', end='2024-09-30').dropna()

portfolio_1_asset_count = len(portfolio_1_tickers)
portfolio_2_asset_count = len(portfolio_2_tickers)

portfolio_1_weights = np.random.random(portfolio_1_asset_count)
portfolio_2_weights = np.random.random(portfolio_2_asset_count)

portfolio_1_weights /= np.sum(portfolio_1_weights)
portfolio_2_weights /= np.sum(portfolio_2_weights)

portfolio_1_closing_prices = pd.DataFrame(portfolio_1_data['adjclose'])
portfolio_2_closing_prices = pd.DataFrame(portfolio_2_data['adjclose'])

portfolio_1_dataframe = portfolio_1_closing_prices.pivot_table(index='date', columns='symbol', values='adjclose')
portfolio_2_dataframe = portfolio_2_closing_prices.pivot_table(index='date', columns='symbol', values='adjclose')

portfolio_1_log_returns = np.log(portfolio_1_dataframe/portfolio_1_dataframe.shift(1))
portfolio_2_log_returns = np.log(portfolio_2_dataframe/portfolio_2_dataframe.shift(1))

market_return = portfolio_2_log_returns['^GSPC'].mean() * 252  # Replace with actual market return

portfolio_2_log_returns = portfolio_2_log_returns.dropna()
portfolio_1_log_returns = portfolio_1_log_returns.dropna()


# --- 1. CAPM Expected Return Calculation ---
def calculate_capm_return(asset_returns, market_returns):
    betas = []
    for asset in asset_returns.columns:
        # Linear regression to find beta
        cov_with_market = np.cov(asset_returns[asset], market_returns)[0, 1]
        beta = cov_with_market / np.var(market_returns)
        betas.append(beta)
    # Calculate expected returns for each asset using CAPM
    capm_expected_returns = [risk_free_rate + beta * (market_return - risk_free_rate) for beta in betas]
    return capm_expected_returns

# Apply CAPM to each portfolio
portfolio_1_capm_returns = calculate_capm_return(portfolio_1_log_returns, portfolio_2_log_returns['^GSPC'])
portfolio_2_capm_returns = calculate_capm_return(portfolio_2_log_returns, portfolio_2_log_returns['^GSPC'])


# --- 2. GARCH Expected Return Calculation ---
def calculate_garch_return(asset_returns):
    garch_expected_returns = []
    for asset in asset_returns.columns:
        model = arch_model(asset_returns[asset].dropna(), vol='Garch', p=1, q=1)
        garch_fit = model.fit(disp="off")
        # Forecast next period's return and volatility
        forecast = garch_fit.forecast(horizon=1)
        expected_volatility = forecast.variance.values[-1, :][0]
        # Expected return considering volatility
        garch_expected_returns.append(expected_volatility)  # Adjust if you have a specific return forecast
    return garch_expected_returns

# Apply GARCH to each portfolio
portfolio_1_garch_returns = calculate_garch_return(portfolio_1_log_returns)
portfolio_2_garch_returns = calculate_garch_return(portfolio_2_log_returns)


# --- 3. Gradient Boosting Expected Return Calculation ---
# Prepare data for gradient boosting model
def prepare_data(asset_returns):
    X, y = [], []
    for i in range(1, len(asset_returns)):
        X.append(asset_returns.iloc[i - 1])
        y.append(asset_returns.iloc[i])
    return np.array(X), np.array(y)

X1, y1 = prepare_data(portfolio_1_log_returns)
X2, y2 = prepare_data(portfolio_2_log_returns)

# Split data for training and testing
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.2, random_state=42)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)

# Train Gradient Boosting Regressor
gb_model_1 = GradientBoostingRegressor()
gb_model_1.fit(X1_train, y1_train)

gb_model_2 = GradientBoostingRegressor()
gb_model_2.fit(X2_train, y2_train)

# Gradient Boosting Expected Return Calculation
def calculate_gradient_boosting_returns(asset_returns):
    gb_expected_returns = []
    for asset in asset_returns.columns:
        # Prepare data for gradient boosting
        X, y = prepare_data(asset_returns[[asset]])  # Keep only one asset at a time
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Train Gradient Boosting Regressor for this asset
        gb_model = GradientBoostingRegressor()
        gb_model.fit(X_train, y_train.ravel())  # Ensure y_train is 1-dimensional
        
        # Predict expected return for the asset
        gb_return = gb_model.predict([asset_returns[asset].iloc[-1]])[0]
        gb_expected_returns.append(gb_return)
    
    return gb_expected_returns

# Apply Gradient Boosting to each portfolio
portfolio_1_gb_returns = calculate_gradient_boosting_returns(portfolio_1_log_returns)
portfolio_2_gb_returns = calculate_gradient_boosting_returns(portfolio_2_log_returns)

# Use CAPM, GARCH, and GB predicted returns to calculate expected return for portfolios
expected_return_portfolio_1 = np.dot(portfolio_1_weights, portfolio_1_capm_returns + portfolio_1_garch_returns + [portfolio_1_gb_return])
expected_return_portfolio_2 = np.dot(portfolio_2_weights, portfolio_2_capm_returns + portfolio_2_garch_returns + [portfolio_2_gb_return])

print(f"Expected Return (Portfolio 1): {expected_return_portfolio_1:.2f}")
print(f"Expected Return (Portfolio 2): {expected_return_portfolio_2:.2f}")

In [None]:
# Initializing tickers and fetching data
portfolio_1_tickers = ['AAPL', 'NFLX']
portfolio_2_tickers = ['AAPL', 'WBD', '^GSPC', '^IXIC', 'NFLX', 'DIS']
portfolio_1_data = Ticker(top_60_long_symbols).history(period='1d', start='2019-03-25', end='2024-09-30').dropna()
portfolio_2_data = Ticker(portfolio_2_tickers).history(period='1d', start='2019-03-25', end='2024-09-30').dropna()

# Setting up portfolio weights
portfolio_1_asset_count = len(portfolio_1_tickers)
portfolio_2_asset_count = len(portfolio_2_tickers)
portfolio_1_weights = np.random.random(portfolio_1_asset_count)
portfolio_2_weights = np.random.random(portfolio_2_asset_count)
portfolio_1_weights /= np.sum(portfolio_1_weights)
portfolio_2_weights /= np.sum(portfolio_2_weights)

# Formatting the data
portfolio_1_closing_prices = pd.DataFrame(portfolio_1_data['adjclose'])
portfolio_2_closing_prices = pd.DataFrame(portfolio_2_data['adjclose'])
portfolio_1_dataframe = portfolio_1_closing_prices.pivot_table(index='date', columns='symbol', values='adjclose')
portfolio_2_dataframe = portfolio_2_closing_prices.pivot_table(index='date', columns='symbol', values='adjclose')

# Log returns
portfolio_1_log_returns = np.log(portfolio_1_dataframe / portfolio_1_dataframe.shift(1)).dropna()
portfolio_2_log_returns = np.log(portfolio_2_dataframe / portfolio_2_dataframe.shift(1)).dropna()

market_return = market_data['log_return'].mean() * 252  # Replace with actual market return

# --- 1. CAPM Expected Return Calculation ---
def calculate_capm_return(asset_returns, market_returns, risk_free_rate=0.02):
    betas = []
    for asset in asset_returns.columns:
        cov_with_market = np.cov(asset_returns[asset], market_returns)[0, 1]
        beta = cov_with_market / np.var(market_returns)
        betas.append(beta)
    capm_expected_returns = [risk_free_rate + beta * (market_return - risk_free_rate) for beta in betas]
    return capm_expected_returns

# Apply CAPM to each portfolio
portfolio_1_capm_returns = calculate_capm_return(portfolio_1_log_returns, market_data['log_return'])
portfolio_2_capm_returns = calculate_capm_return(portfolio_2_log_returns, market_data['log_return'])

# --- 2. GARCH Expected Return Calculation ---
def calculate_garch_return(asset_returns):
    garch_expected_returns = []
    for asset in asset_returns.columns:
        model = arch_model(asset_returns[asset].dropna(), vol='Garch', p=1, q=1)
        garch_fit = model.fit(disp="off")
        forecast = garch_fit.forecast(horizon=1)
        expected_volatility = forecast.variance.values[-1, :][0]
        garch_expected_returns.append(expected_volatility)
    return garch_expected_returns

# Apply GARCH to each portfolio
portfolio_1_garch_returns = calculate_garch_return(portfolio_1_log_returns)
portfolio_2_garch_returns = calculate_garch_return(portfolio_2_log_returns)


# --- Combine CAPM, GARCH, and Gradient Boosting returns for final expected return ---
expected_return_portfolio_1 = np.dot(portfolio_1_weights, np.array(portfolio_1_capm_returns) + np.array(portfolio_1_garch_returns))
expected_return_portfolio_2 = np.dot(portfolio_2_weights, np.array(portfolio_2_capm_returns) + np.array(portfolio_2_garch_returns))

print(f"Expected Return (Portfolio 1): {expected_return_portfolio_1:.2f}")
print(f"Expected Return (Portfolio 2): {expected_return_portfolio_2:.2f}")