In [8]:
# Install necessary packages

!pip install yfinance tensorflow keras-tuner PyPortfolioOpt scikit-learn matplotlib seaborn




In [9]:
# 1. Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from tensorflow import keras
from keras import layers
from keras_tuner import HyperModel, RandomSearch
from pypfopt import EfficientFrontier, risk_models, expected_returns
import json
import re

In [10]:
global cleaned_weights

In [11]:
# with open('symbols.json', 'r') as f:
#    tickers = json.load(f)

# pk_filenames.json = ["data/ibm.pk1", "data/aapl.pk1"]

with open('pk_filesnames.json','r') as f:
    filenames = json.load(f)

# for filename in filenames:
#     df = pd.read_pickle(filename)

In [12]:
# 2. Data Collection
# with open('symbols.json', 'r') as f:
#    tickers = json.load(f)

# def fetch_data(ticker, start, end):
#     data = yf.download(ticker, start=start, end=end)
#     return data['Adj Close']

# tickers = ['AAPL', 'MSFT', 'GOOGL', 'TSLA', 'NVDA', 'TLT', 'COST', 'WMT', 'BA', 'DIS', 'JPM', 'AMD']
all_expected_returns = {}
all_data = pd.DataFrame()

In [16]:
# Loop through each ticker individually
# for ticker in tickers:
for filename in filenames:
    df = pd.read_pickle(filename)
    # data = fetch_data(ticker, '2020-01-01', '2023-01-01')
    data = df['Adj Close']
    ticker = filename.split('/')[1].split('.')[0]

    all_data[ticker] = data  # Store data for covariance calculation
    # 3. Data Preprocessing
    returns = data.pct_change().dropna()

    # Reshape data for LSTM in a compatible format
    X = returns.values.reshape(-1, 1, 1)

    # 4. LSTM Modeling
    class LSTMHyperModel(HyperModel):
        def build(self, hp):
            model = keras.Sequential()
            model.add(layers.Input(shape=(1, 1)))
            model.add(layers.LSTM(units=hp.Int('units', min_value=32, max_value=128, step=32), activation='relu'))
            model.add(layers.Dense(1))
            model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3])), loss='mse')
            return model

    # Hyperparameter tuning
    tuner = RandomSearch(
        LSTMHyperModel(),
        objective='val_loss',
        max_trials=5,
        executions_per_trial=3,
        directory='lstm_tuning',
        project_name=f'portfolio_optimization_{ticker}'
    )

    # Train the model only for the current ticker
    tuner.search(X, returns.values, epochs=10, validation_split=0.2)
    best_model = tuner.get_best_models(num_models=1)[0]

    # Predict future returns
    predicted_returns = best_model.predict(X)

    # Aggregate predictions to get expected daily returns for each asset
    expected_daily_return = np.mean(predicted_returns)

    # Annualize the expected daily return
    annualized_return = (1 + expected_daily_return) ** 252 - 1
    all_expected_returns[ticker] = annualized_return

    # Print annualized expected returns for each ticker
    print(f"Annualized Expected Returns for {ticker}: {annualized_return}")

Reloading Tuner from lstm_tuning\portfolio_optimization_ibm\tuner0.json
[1m 1/40[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 123ms/step

  saveable.load_own_variables(weights_store.get(inner_path))


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Annualized Expected Returns for ibm: 0.17755370909701695
Reloading Tuner from lstm_tuning\portfolio_optimization_aapl\tuner0.json
[1m 1/40[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 124ms/step

  saveable.load_own_variables(weights_store.get(inner_path))


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Annualized Expected Returns for aapl: 0.4091733450699866


In [14]:
# Convert the dictionary to a Pandas Series
expected_returns_series = pd.Series(all_expected_returns)

In [15]:
# Portfolio Optimization
# Calculate the covariance matrix using all tickers' data
cov_matrix = risk_models.risk_matrix(all_data, method='ledoit_wolf' )
# https://pyportfolioopt.readthedocs.io/en/latest/RiskModels.html
ef = EfficientFrontier(expected_returns=expected_returns_series, cov_matrix=cov_matrix)
weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()

# Print the optimized portfolio weights
print("Optimized Portfolio Weights:", cleaned_weights)

Optimized Portfolio Weights: OrderedDict([('ibm', 0.12534), ('aapl', 0.87466)])
