In [None]:
#installs necessary libraries
!pip install openpyxl
!pip install pandas
!pip install yfinance
!pip install matplotlib
!pip install scipy
!pip install scikit-learn
!pip install PyPortfolioOpt
!pip install cvxpy

sector_allocation = {
    ''
}


Collecting PyPortfolioOpt
  Downloading pyportfolioopt-1.5.5-py3-none-any.whl.metadata (23 kB)
Downloading pyportfolioopt-1.5.5-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPortfolioOpt
Successfully installed PyPortfolioOpt-1.5.5


In [None]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
import pathlib
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as sci_opt

from pprint import pprint
from sklearn.preprocessing import StandardScaler

# Set some display options for Pandas.
%config InlineBackend.figure_format ='retina'
pd.set_option('display.max_colwidth', None)
pd.set_option('expand_frame_repr', False)
pd.set_option('display.max_columns', None)



## Fetching S&P 500 Companies:
sp500_companies = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
sp500_companies['Symbol'] = sp500_companies['Symbol'].replace({'BRK.B': 'BRK-B', 'BF.B': 'BF-B'})
sp500_companies
# Dropping NaN values
sp500_companies.dropna(inplace=True)
## Getting Unique Sectors and Dates:
unique_sectors = sp500_companies['GICS Sector'].unique()
end_date = datetime.today().strftime('%Y-%m-%d')
start_date = (datetime.today() - timedelta(days=90)).strftime('%Y-%m-%d')
unique_sectors_df = pd.DataFrame(unique_sectors)
print(unique_sectors_df)
number_of_sectors = len(unique_sectors)
len(unique_sectors)

## Fetching Historical Prices and Calculating Averages:
## The code fetches historical closing prices for each company in each sector and calculates the average closing price for the sector.
# Loop through each sector and download closing prices
all_avg_prices = []
all_dates = []
all_sectors = []
for sector in unique_sectors:
    companies = sp500_companies[sp500_companies['GICS Sector'] == sector]['Symbol'].tolist()
    prices = yf.download(companies, start=start_date, end=end_date)['Adj Close']
    avg_prices = prices.mean(axis=1)
    # Append the data
    all_avg_prices.extend(avg_prices.tolist())
    all_dates.extend([date.date() for date in prices.index])
    all_sectors.extend([sector] * len(avg_prices))

# Create a DataFrame
sp500_sectors_avg_prices = pd.DataFrame({
    'Date': all_dates,
    'Avg_Close': all_avg_prices,
    'Sector': all_sectors
})


#sp500_sectors_avg_prices = sp500_sectors_avg_prices.pivot(index='Date', columns='Sector', values='Avg_Close')
# Reverse the order of the DataFrame by dates
sp500_sectors_avg_prices = sp500_sectors_avg_prices[::-1]

print(sp500_sectors_avg_prices.head(100))
sp500_sectors_avg_prices = sp500_sectors_avg_prices.pivot(
    index='Date',
    columns='Sector',
    values='Avg_Close'
)
print(sp500_sectors_avg_prices.head())
sp500_sectors_avg_prices.dropna(inplace=True)
print(sp500_sectors_avg_prices.isnull().sum())
sp500_sectors_avg_prices.to_excel('sp500_sectors_avg_prices.xlsx', index=True)

# Calculate the Log of returns.
log_return = np.log(1 +sp500_sectors_avg_prices.pct_change().iloc[::-1])

# Drop rows with negative values
log_return = log_return[(log_return > 0).all(axis=1)]


# Generate Random Weights.
random_weights = np.array(np.random.random(number_of_sectors))

# Generate the Rebalance Weights, these should equal 1.
rebalance_weights = random_weights / np.sum(random_weights)

print('Log Returns:')
print(log_return.head())
print((sp500_sectors_avg_prices == 0).sum())
log_return.to_excel('log_return.xlsx', index=True)
# Calculate the percentage of negative log returns
percentage_negative_returns = (log_return < 0).mean().mean() * 100

print(f"Percentage of negative log returns: {percentage_negative_returns:.2f}%")


# Calculate the Expected Returns, annualize it by multiplying it by `252`.
risk_free_rate = .01
exp_ret = np.sum(((log_return.mean()-risk_free_rate) * rebalance_weights) * 252)

# Calculate the Expected Volatility, annualize it by multiplying it by `252`.
exp_vol = np.sqrt(
np.dot(
    rebalance_weights.T,
    np.dot(
        log_return.cov() * 252,
        rebalance_weights
    )
)
)

# Calculate the Sharpe Ratio.
sharpe_ratio = exp_ret / exp_vol

# Put the weights into a data frame to see them better.
weights_df = pd.DataFrame(data={
'random_weights': random_weights,
'rebalance_weights': rebalance_weights
})
print('')
print('='*80)
print('PORTFOLIO WEIGHTS:')
print('-'*80)
print(weights_df)
print('-'*80)

# Do the same with the other metrics.
metrics_df = pd.DataFrame(data={
    'Expected Portfolio Returns': exp_ret,
    'Expected Portfolio Volatility': exp_vol,
    'Portfolio Sharpe Ratio': sharpe_ratio
}, index=[0])

print('')
print('='*80)
print('PORTFOLIO METRICS:')
print('-'*80)
print(metrics_df)
print('-'*80)

# Initialize the components, to run a Monte Carlo Simulation.

# We will run 5000 iterations.
num_of_portfolios = 20000

# Prep an array to store the weights as they are generated, 5000 iterations for each of our 4 symbols.
all_weights = np.zeros((num_of_portfolios, number_of_sectors))

# Prep an array to store the returns as they are generated, 5000 possible return values.
ret_arr = np.zeros(num_of_portfolios)

# Prep an array to store the volatilities as they are generated, 5000 possible volatility values.
vol_arr = np.zeros(num_of_portfolios)

# Prep an array to store the sharpe ratios as they are generated, 5000 possible Sharpe Ratios.
sharpe_arr = np.zeros(num_of_portfolios)

# Start the simulations.
for ind in range(num_of_portfolios):

    # First, calculate the weights.
    weights = np.array(np.random.random(number_of_sectors))
    weights = weights / np.sum(weights)

    # Add the weights, to the `weights_arrays`.
    all_weights[ind, :] = weights

    # Calculate the expected log returns, and add them to the `returns_array`.
    ret_arr[ind] = np.sum(((log_return.mean()-risk_free_rate) * weights) * 252)

    # Calculate the volatility, and add them to the `volatility_array`.
    vol_arr[ind] = np.sqrt(
        np.dot(weights.T, np.dot(log_return.cov() * 252, weights)))


    # Calculate the Sharpe Ratio and Add it to the `sharpe_ratio_array`.
    sharpe_arr[ind] = ret_arr[ind]/vol_arr[ind]

# Let's create our "Master Data Frame", with the weights, the returns, the volatility, and the Sharpe Ratio
simulations_data = [ret_arr, vol_arr, sharpe_arr, all_weights]

# Create a DataFrame from it, then Transpose it so it looks like our original one.
simulations_df = pd.DataFrame(data=simulations_data).T

# Give the columns the Proper Names.
simulations_df.columns = [
    'Returns',
    'Volatility',
    'Sharpe Ratio',
    'Portfolio Weights'
]

# Make sure the data types are correct, we don't want our floats to be strings.
simulations_df = simulations_df.infer_objects()

# Print out the results.
print('')
print('='*80)
print('SIMULATIONS RESULT:')
print('-'*80)

# Print PORTFOLIO WEIGHTS
print('='*80)
print('PORTFOLIO WEIGHTS:')
print('-'*80)
print(simulations_df['Portfolio Weights'].head())
print('-'*80)

# Print PORTFOLIO METRICS
print('='*80)
print('PORTFOLIO METRICS:')
print('-'*80)
print(simulations_df[['Returns', 'Volatility', 'Sharpe Ratio']].head())
print('-'*80)

# Return the Max Sharpe Ratio from the run.
max_sharpe_ratio = simulations_df.loc[simulations_df['Sharpe Ratio'].idxmax()]

# Return the Min Volatility from the run.
min_volatility = simulations_df.loc[simulations_df['Volatility'].idxmin()]

print('')
print('='*80)
print('MAX SHARPE RATIO:')
print('-'*80)
print(max_sharpe_ratio)
print('-'*80)

print('')
print('='*80)
print('MIN VOLATILITY:')
print('-'*80)
print(min_volatility)
print('-'*80)
#### Machine learning time
#We can further improve these portfolios by generating future data via machine learning.
simulations_df

                         0
0              Industrials
1              Health Care
2   Information Technology
3                Utilities
4               Financials
5                Materials
6   Consumer Discretionary
7              Real Estate
8   Communication Services
9         Consumer Staples
10                  Energy


[*********************100%%**********************]  78 of 78 completed
[*********************100%%**********************]  63 of 63 completed
[*********************100%%**********************]  67 of 67 completed
[*********************100%%**********************]  31 of 31 completed
[*********************100%%**********************]  71 of 71 completed
[*********************100%%**********************]  28 of 28 completed
[*********************100%%**********************]  52 of 52 completed
[*********************100%%**********************]  31 of 31 completed
[*********************100%%**********************]  22 of 22 completed
[*********************100%%**********************]  38 of 38 completed
[*********************100%%**********************]  22 of 22 completed


           Date   Avg_Close            Sector
670  2024-08-14   86.743182            Energy
669  2024-08-13   86.108182            Energy
668  2024-08-12   87.193637            Energy
667  2024-08-09   86.519546            Energy
666  2024-08-08   86.371929            Energy
..          ...         ...               ...
575  2024-06-26  105.796634  Consumer Staples
574  2024-06-25  106.101590  Consumer Staples
573  2024-06-24  106.839086  Consumer Staples
572  2024-06-21  105.896841  Consumer Staples
571  2024-06-20  105.874435  Consumer Staples

[100 rows x 3 columns]
Sector      Communication Services  Consumer Discretionary  Consumer Staples     Energy  Financials  Health Care  Industrials  Information Technology   Materials  Real Estate  Utilities
Date                                                                                                                                                                                     
2024-05-17              124.862209              438.

Unnamed: 0,Returns,Volatility,Sharpe Ratio,Portfolio Weights
0,0.373497,0.060623,6.161002,"[0.15545560467975686, 0.08099045400387961, 0.1605808420025511, 0.156976556693859, 0.04384222946534695, 0.011126161296315462, 0.052636241278154174, 0.015218285564109585, 0.18189106668584362, 0.006236246066150228, 0.1350463122640333]"
1,0.496263,0.077892,6.371198,"[0.08761653760546698, 0.0619854600052567, 0.09785168739308488, 0.12715126036946425, 0.04794858961043173, 0.11763274761765505, 0.09863173588542967, 0.1217898778511918, 0.09424953534343593, 0.05039747042142074, 0.09474509789716226]"
2,0.692741,0.083989,8.248026,"[0.03459597456355578, 0.13897869503765117, 0.03603103435106137, 0.0771538294955693, 0.15667359957900995, 0.09893571323706359, 0.11493916880664874, 0.1582350819245519, 0.03582002709664156, 0.07631440137118455, 0.07232247453706206]"
3,0.592203,0.072276,8.193642,"[0.10200559224153104, 0.074387016595734, 0.0897391275987172, 0.04179093777999673, 0.015520142923009907, 0.1431919865892544, 0.10330644390374276, 0.10119052611598528, 0.1468678967054398, 0.04861506043394574, 0.13338526911264328]"
4,0.418154,0.059925,6.977925,"[0.008704382094659074, 0.0807786293472782, 0.05524057790602898, 0.09363744627100999, 0.15303861314747644, 0.08209197218322228, 0.09152317897796063, 0.011883810634950252, 0.12270689906338259, 0.1463954457896083, 0.15399904458442334]"
...,...,...,...,...
19995,0.681732,0.069165,9.856608,"[0.10164703165654955, 0.12373135201551956, 0.028452492281154717, 0.034031460221243724, 0.1505898624461091, 0.0244273614378773, 0.04746753380847296, 0.1291301190435483, 0.1113656204790433, 0.10806388662617077, 0.14109327998431065]"
19996,0.443014,0.069805,6.346443,"[0.036392441446180715, 0.1271659580550981, 0.12815601890321843, 0.042164223905350066, 0.15018942434357974, 0.13058600943122645, 0.04245592493934659, 0.13673906876734673, 0.003636317555458105, 0.048278352614626416, 0.15423626003856858]"
19997,0.484699,0.061257,7.912588,"[0.11267329459941501, 0.04934643668724532, 0.08131548326440335, 0.030920317237235564, 0.14970234925546722, 0.07220438358340782, 0.10664247844928736, 0.01823911578359049, 0.10196609006859826, 0.13196705801132527, 0.14502299306002422]"
19998,0.567378,0.072030,7.876942,"[0.048850133796597826, 0.11462653240951437, 0.10930586503684789, 0.11378299417641466, 0.005716704575261983, 0.06407550718666682, 0.09990692549717109, 0.13071056969365147, 0.1495787872825033, 0.025837277402817847, 0.1376087029425527]"


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


simulations_df["Portfolio Weights"] = simulations_df["Portfolio Weights"].apply(lambda x: np.array(x) if isinstance(x, list) else x)


flattened_weights = np.vstack(simulations_df["Portfolio Weights"].to_numpy())


X = simulations_df[["Volatility", "Sharpe Ratio"]].values
y = flattened_weights


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


rf_models = []
y_preds = []


for i in range(y.shape[1]):
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train[:, i])
    y_pred = rf_model.predict(X_test)

    rf_models.append(rf_model)
    y_preds.append(y_pred)


y_preds = np.array(y_preds).T


print("Predicted Portfolio Weights (first 5 samples):")
print(y_preds[:5])

print("\nActual Portfolio Weights (first 5 samples):")
print(y_test[:5])

mse_list = [mean_squared_error(y_test[:, i], y_preds[:, i]) for i in range(y.shape[1])]

print("\nMean Squared Errors for each portfolio weight:")
print(mse_list)

Predicted Portfolio Weights (first 5 samples):
[[0.08355741 0.09656045 0.10182403 0.05605305 0.09787575 0.06418933
  0.08618862 0.08586038 0.09318793 0.08861494 0.12140461]
 [0.1022224  0.10214948 0.07154781 0.13503212 0.08954877 0.06009024
  0.08122161 0.15342468 0.0610693  0.10634761 0.08369239]
 [0.09496825 0.02695341 0.11058355 0.12481344 0.11556987 0.12495479
  0.08491269 0.0882909  0.080263   0.06740335 0.10265392]
 [0.0316887  0.05632549 0.15455671 0.1059399  0.11556558 0.07875279
  0.03800732 0.01264792 0.10352041 0.07283431 0.21911664]
 [0.09159742 0.04785268 0.11106643 0.15270333 0.11766265 0.10594109
  0.05570427 0.09822824 0.03888325 0.11268873 0.09587799]]

Actual Portfolio Weights (first 5 samples):
[[0.04808128 0.11262189 0.04517067 0.15640812 0.00445794 0.03142159
  0.07309652 0.16016828 0.15106738 0.02311568 0.19439065]
 [0.04259952 0.08050434 0.14025763 0.06548998 0.09139792 0.10930448
  0.14347865 0.13553609 0.05069301 0.10042648 0.0403119 ]
 [0.13232583 0.00261077 0