In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models, expected_returns, plotting
import os


In [3]:

# Loading predictions & historical returns

preds = pd.read_csv("C:\\Users\\ansul\\OneDrive\\Desktop\\data science project\\portfolio_optimization_ML\\data\\processed\\predicted_probabilities.csv", parse_dates=["Date"])
returns = pd.read_csv("C:\\Users\\ansul\\OneDrive\\Desktop\\data science project\\portfolio_optimization_ML\\data\\processed\\daily_returns.csv", parse_dates=["Date"], index_col="Date")

print("Predictions sample:")
print(preds.head())

print("Returns sample:")
print(returns.head())


Predictions sample:
        Date  Pred_Prob_Up  True_Label
0 2024-02-29      0.524484           0
1 2024-02-29      0.525042           1
2 2024-02-29      0.525421           1
3 2024-02-29      0.525758           1
4 2024-02-29      0.525758           0
Returns sample:
                AAPL      MSFT      AMZN      TSLA       JPM
Date                                                        
2018-01-03 -0.000174  0.004654  0.012775 -0.010233  0.001019
2018-01-04  0.004645  0.008801  0.004476 -0.008290  0.014326
2018-01-05  0.011385  0.012398  0.016163  0.006230 -0.006420
2018-01-08 -0.003715  0.001020  0.014425  0.062638  0.001477
2018-01-09 -0.000115 -0.000679  0.004676 -0.008085  0.005069


In [5]:
# Converting predicted probability → expected return

# Scale probability into expected daily return using historical avg return
avg_daily_return = returns.mean().mean()
preds["Expected_Return"] = preds["Pred_Prob_Up"] * avg_daily_return

# Take last predicted return per ticker
mu_ml = preds.groupby("Ticker")["Expected_Return"].last()

print("\nPredicted Expected Returns (latest):")
print(mu_ml)

KeyError: 'Ticker'

In [None]:

# -----------------------------
# 📌 Step 3: Estimate covariance matrix (historical)
# -----------------------------
mu_hist = expected_returns.mean_historical_return(returns)  # classical expected returns
S = risk_models.sample_cov(returns)

# -----------------------------
# 📌 Step 4: Classical Portfolio (baseline)
# -----------------------------
ef_classical = EfficientFrontier(mu_hist, S)
weights_classical = ef_classical.max_sharpe()
cleaned_weights_classical = ef_classical.clean_weights()
performance_classical = ef_classical.portfolio_performance(verbose=True)

# -----------------------------
# 📌 Step 5: ML-driven Portfolio
# -----------------------------
ef_ml = EfficientFrontier(mu_ml, S)
weights_ml = ef_ml.max_sharpe()
cleaned_weights_ml = ef_ml.clean_weights()
performance_ml = ef_ml.portfolio_performance(verbose=True)

# -----------------------------
# 📌 Step 6: Compare Results
# -----------------------------
print("\n🔹 Classical Portfolio Weights:")
print(cleaned_weights_classical)

print("\n🔹 ML-Driven Portfolio Weights:")
print(cleaned_weights_ml)

print("\n📊 Performance Comparison")
print("Classical:", performance_classical)
print("ML-Driven:", performance_ml)

# -----------------------------
# 📌 Step 7: Visualization - Allocation
# -----------------------------
fig, ax = plt.subplots(1, 2, figsize=(14,6))

# Pie chart: Classical Portfolio
ax[0].pie(cleaned_weights_classical.values(),
          labels=cleaned_weights_classical.keys(),
          autopct='%1.1f%%', startangle=90)
ax[0].set_title("Classical Portfolio Allocation")

# Pie chart: ML-Driven Portfolio
ax[1].pie(cleaned_weights_ml.values(),
          labels=cleaned_weights_ml.keys(),
          autopct='%1.1f%%', startangle=90)
ax[1].set_title("ML-Driven Portfolio Allocation")

plt.show()

# -----------------------------
# 📌 Step 8: Efficient Frontier Plot
# -----------------------------
plt.figure(figsize=(10,7))
plotting.plot_efficient_frontier(ef_classical, show_assets=True)

# Mark classical max Sharpe
ret_c, vol_c, sharpe_c = performance_classical
plt.scatter(vol_c, ret_c, marker="*", color="red", s=200, label="Classical Max Sharpe")

# Mark ML max Sharpe
ret_ml, vol_ml, sharpe_ml = performance_ml
plt.scatter(vol_ml, ret_ml, marker="*", color="blue", s=200, label="ML Max Sharpe")

plt.title("Efficient Frontier: Classical vs ML-Driven Portfolio")
plt.legend()
plt.show()

# -----------------------------
# 📌 Step 9: Portfolio Backtest (Cumulative Returns)
# -----------------------------
# Compute daily portfolio returns for both strategies
w_classical = pd.Series(cleaned_weights_classical)
w_ml = pd.Series(cleaned_weights_ml)

port_ret_classical = (returns * w_classical).sum(axis=1)
port_ret_ml = (returns * w_ml).sum(axis=1)

cum_ret_classical = (1 + port_ret_classical).cumprod()
cum_ret_ml = (1 + port_ret_ml).cumprod()

plt.figure(figsize=(12,6))
plt.plot(cum_ret_classical, label="Classical Portfolio")
plt.plot(cum_ret_ml, label="ML-Driven Portfolio")
plt.title("Cumulative Returns Comparison")
plt.xlabel("Date")
plt.ylabel("Cumulative Growth")
plt.legend()
plt.show()
