In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pypfopt import EfficientFrontier, risk_models, expected_returns, plotting
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Step 1: Load historical price data
data = pd.read_csv("stock_prices.csv", index_col=0, parse_dates=True)
returns = data.pct_change().dropna()

# Step 2: Use Machine Learning to Predict Expected Returns
# Feature engineering (using past returns as features)
X = returns.shift(1).dropna()  # Lagged returns as features
y = returns.iloc[1:]  # Current returns as target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict expected returns
predicted_returns = model.predict(X_test)
predicted_returns = pd.DataFrame(predicted_returns, index=X_test.index, columns=y.columns)

# Step 3: Calculate covariance matrix
S = risk_models.sample_cov(data)

# Step 4: Portfolio Optimization with Constraints
ef = EfficientFrontier(predicted_returns.mean(), S)

# Add constraints (e.g., no more than 20% in any single stock, at least 10% in MSFT)
ef.add_constraint(lambda w: w <= 0.2)  # Max 20% per asset
ef.add_constraint(lambda w: w[1] >= 0.1)  # At least 10% in MSFT (assuming MSFT is the second asset)

# Optimize for maximal Sharpe ratio
weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()

# Step 5: Visualize the Efficient Frontier
fig, ax = plt.subplots()
plotting.plot_efficient_frontier(ef, ax=ax, show_assets=True)
plt.title("Efficient Frontier")
plt.xlabel("Risk (Standard Deviation)")
plt.ylabel("Return")
plt.show()

# Step 6: Print Portfolio Performance
print("Optimized Weights:")
print(cleaned_weights)
ef.portfolio_performance(verbose=True)