# Combinations
Data: from 2015-01-01 to 2025-03-25 (current day) 

**Baseline**: S&P500 **Annual Return**: 11.04% 

**Try**: window_size = 280, rebalance_period = 1, max_selected_stocks = 100 **Results**: 17.86%

**Try**: window_size = 260, rebalance_period = 1, max_selected_stocks = 100 **Results**: 18.36%

**Try**: window_size = 240, rebalance_period = 1, max_selected_stocks = 100 **Results**: 18.69%

**Try**: window_size = 220, rebalance_period = 1, max_selected_stocks = 100 **Results**: 18.72%

**Try**: window_size = 200, rebalance_period = 1, max_selected_stocks = 100 **Results**: 20.64%

**Try**: window_size = 190, rebalance_period = 1, max_selected_stocks = 100 **Results**: 20.60%

**Try**: window_size = 180, rebalance_period = 1, max_selected_stocks = 100 **Results**: 20.91%

**Try**: window_size = 179, rebalance_period = 1, max_selected_stocks = 100 **Results**: 21.17%

**Try**: window_size = 178, rebalance_period = 1, max_selected_stocks = 100 **Results**: 20.60%

**Try**: window_size = 177, rebalance_period = 1, max_selected_stocks = 100 **Results**: 21.55%

**Try**: window_size = 176, rebalance_period = 1, max_selected_stocks = 100 **Results**: 21.53%

**Try**: window_size = 175, rebalance_period = 1, max_selected_stocks = 100 **Results**: 22.34%

**Try**: window_size = 174, rebalance_period = 1, max_selected_stocks = 100 **Results**: 22.71%

**Try**: window_size = 173, rebalance_period = 1, max_selected_stocks = 100 **Results**: 22.50%

**Try**: window_size = 172, rebalance_period = 1, max_selected_stocks = 100 **Results**: 22.52%

**Try**: window_size = 171, rebalance_period = 1, max_selected_stocks = 100 **Results**: 22.65%

****
**Try**: window_size = 170, rebalance_period = 1, max_selected_stocks = 100 **Results**: 23.11%

**Try**: window_size = 170, rebalance_period = 2, max_selected_stocks = 100 **Results**: 11.65%

**Try**: window_size = 170, rebalance_period = 3, max_selected_stocks = 100 **Results**: 7.49%

**Try**: window_size = 170, rebalance_period = 4, max_selected_stocks = 100 **Results**: 5.45%

**Try**: window_size = 170, rebalance_period = 5, max_selected_stocks = 100 **Results**: 4.34% 

**Try**: window_size = 170, rebalance_period = 6, max_selected_stocks = 100 **Results**: 3.71%

**Try**: window_size = 170, rebalance_period = 7, max_selected_stocks = 100 **Results**: 2.96%
****

**Try**: window_size = 165, rebalance_period = 1, max_selected_stocks = 100 **Results**: 22.52%

**Try**: window_size = 160, rebalance_period = 1, max_selected_stocks = 100 **Results**: 22.96%

**Try**: window_size = 150, rebalance_period = 1, max_selected_stocks = 100 **Results**: 20.39%

**Try**: window_size = 140, rebalance_period = 1, max_selected_stocks = 100 **Results**: 17.69%

**Try**: window_size = 120, rebalance_period = 1, max_selected_stocks = 100 **Results**: 17.46%  

**Try**: window_size = 100, rebalance_period = 1, max_selected_stocks = 100 **Results**: 16.23%

**Try**: window_size = 80, rebalance_period = 1, max_selected_stocks = 100 **Results**: 12.33%

**Try**: window_size = 60, rebalance_period = 1, max_selected_stocks = 100 **Results**: 12.68%

**Try**: window_size = 40, rebalance_period = 1, max_selected_stocks = 100 **Results**: 10.92%

**Try**: window_size = 20, rebalance_period = 1, max_selected_stocks = 100 **Results**: 10.66%

In [None]:
import os
import pandas as pd
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt

# Set the directory and initial capital
directory = "/home/jesse/Projects/CWP_RL/02_MVO_Multi_Assets/SP500_15_25"
initial_capital = 10000
min_data_length = 2500  # At least 2500 rows of data
window_size = 220 # Number of days to use for calculating MVO
rebalance_period = 1  # Rebalance the portfolio every 1 day
max_selected_stocks = 100  # Maximum number of stocks to hold
risk_free_rate = 0 # Here it doesn't really affect the portfolio rate, just the Sharpe Ratio would be overestimated

print("âœ… Initialization complete!")


In [None]:
def load_data(directory):
    stock_data = {}
    for ticker in os.listdir(directory):
        if ticker.endswith(".csv"):
            file_path = os.path.join(directory, ticker)
            df = pd.read_csv(file_path)
            df['Date'] = pd.to_datetime(df['Date'])
            df.set_index('Date', inplace=True)
            df.sort_index(inplace=True)

            # Only select stocks with enough data (>= min_data_length)
            if df.shape[0] >= min_data_length:
                # Store both Open and Close prices for each stock
                stock_data[ticker.replace(".csv", "")] = df[['Close']]
    
    return stock_data

# Load the stock data
stock_data = load_data(directory)

# Preview the data of the first 5 stocks
for ticker in list(stock_data.keys())[:5]:
    print(f"ðŸ“ˆ Sample data for {ticker}:\n", stock_data[ticker].head())

print(f"\nâœ… Successfully loaded data for {len(stock_data)} stocks!")


In [None]:
# Initialize an empty list to store returns for each stock
returns_list = []

for ticker in stock_data:
    # Get the daily returns for 'Close' prices
    stock_returns = stock_data[ticker].pct_change().dropna()
    
    # Add the 'Close' returns to the list
    returns_list.append(stock_returns['Close'])

# Concatenate all the returns DataFrames along columns (axis=1)
returns = pd.concat(returns_list, axis=1)

# Set column names as stock tickers
returns.columns = stock_data.keys()

# Preview the data
print("ðŸ“Š Example of returns data (first 5 rows):\n", returns.head())

print("\nâœ… Daily returns calculation completed!")


In [None]:
# Calculate annualized returns
def annualized_returns(returns, trading_days=252):
    return returns.mean() * trading_days

# Calculate the annualized covariance matrix
def annualized_covariance(returns, trading_days=252):
    return returns.cov() * trading_days

# Objective function: Maximize Sharpe Ratio
def objective(weights, mean_returns, cov_matrix, risk_free_rate = risk_free_rate):
    portfolio_return = np.sum(weights * mean_returns)
    
    # Calculate portfolio volatility (standard deviation)
    portfolio_variance = np.dot(weights.T, np.dot(cov_matrix, weights))
    
    # Handle negative or NaN variance (safety check)
    if portfolio_variance < 0 or np.isnan(portfolio_variance):
        portfolio_volatility = 1e-6  # A small value to avoid sqrt calculation error
    else:
        portfolio_volatility = np.sqrt(portfolio_variance)

    # Calculate the Sharpe Ratio
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility

    return -sharpe_ratio  # Since `minimize` function in next block for perform optimization is minimizing, here we return the negative of the Sharpe Ratio

print("âœ… MVO related functions have been defined!")


In [None]:
# Initialize
portfolio_value = initial_capital
portfolio_values = []
weights_history = []

rebalance_dates = []  # Store the dates of each rebalance

for i in range(window_size, len(returns), rebalance_period):
    # Record the rebalance date (use i-1, as it refers to the data from the previous day)
    rebalance_dates.append(returns.index[i - 1])  

    # Get window data
    window_returns = returns.iloc[i - window_size:i]

    # Calculate annualized returns and standard deviation
    mean_returns = annualized_returns(window_returns)
    std_returns = window_returns.std() * np.sqrt(252)  # Annualized standard deviation

    # Calculate Sharpe Ratio and select the top max_selected_stocks stocks
    sharpe_ratios = mean_returns / std_returns
    top_stocks = sharpe_ratios.nlargest(max_selected_stocks).index  # Select the top max_selected_stocks stocks

    # Perform MVO only for selected stocks
    selected_returns = window_returns[top_stocks]
    mean_returns = mean_returns[top_stocks]
    cov_matrix = annualized_covariance(selected_returns)

    # Add a regularization term to prevent singular covariance matrix
    cov_matrix += np.eye(len(mean_returns)) * 1e-6  

    # Initialize weights
    num_assets = len(mean_returns)
    initial_weights = np.ones(num_assets) / num_assets

    # Constraints: Weights sum to 1
    constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})

    # Bounds for weights (0 ~ 1)
    bounds = tuple((0, 1) for _ in range(num_assets))

    # Perform optimization
    optimized_result = minimize(objective, initial_weights, args=(mean_returns, cov_matrix),
                                method='SLSQP', bounds=bounds, constraints=constraints)

    # Get optimal weights
    optimal_weights = optimized_result.x
    weights_history.append(pd.Series(optimal_weights, index=top_stocks, name=returns.index[i - 1]))

    # Calculate portfolio return (based on actual returns for the next period)
    recent_returns = returns.iloc[i:i + rebalance_period][top_stocks].mean()
    portfolio_return = np.sum(optimal_weights * recent_returns)
    portfolio_value *= (1 + portfolio_return)
    portfolio_values.append(portfolio_value)


# Link portfolio values with dates
portfolio_results = pd.DataFrame({'Date': rebalance_dates, 'Portfolio Value': portfolio_values})

# Record portfolio weights
weights_df = pd.DataFrame(weights_history)

print("âœ… MVO Calculation Complete, Dates and Weights Recorded!")

# Print Results
print(portfolio_results.head())
print(weights_df.head())


In [None]:
# Generate DataFrame for weights and portfolio values
weights_df = pd.DataFrame(weights_history)
portfolio_values_df = pd.DataFrame(portfolio_values, columns=['Portfolio Value'], index=weights_df.index)

# Save results
weights_df.to_csv("optimal_weights_history.csv")
portfolio_values_df.to_csv("portfolio_value_history.csv")

print("âœ… Results have been saved!")


In [None]:
# Create MVO Portfolio Returns
plt.figure(figsize=(15, 9))
plt.plot(portfolio_values_df.index, portfolio_values_df['Portfolio Value'], label='Portfolio Value', color='b')
plt.xlabel('Date')
plt.ylabel('Portfolio Value ($)')
plt.title('Portfolio Value Over Time')
plt.legend()
plt.grid(True)
plt.show()

print("âœ… Graph Finished")


In [None]:
import matplotlib.dates as mdates
import yfinance as yf

# Download S&P 500 daily closing price data
data = yf.download('^GSPC', start='2015-06-24', end='2025-03-25')

# Get closing prices
close_prices = data['Close']

# Normalize closing prices: set the starting point to 10,000
normalized_close = 10000 * (close_prices / close_prices.iloc[0])

# Create chart
plt.figure(figsize=(18, 12))
plt.plot(close_prices.index, normalized_close, label='S&P 500 Daily Closing Price', color='r')

# Set x-axis to years
plt.xlabel('Year', fontsize=12)
plt.ylabel('Normalized Price', fontsize=12)
plt.title('S&P 500 Daily Closing Price (Normalized)', fontsize=14)

# Format x-axis: display a tick every 2 years
plt.gca().xaxis.set_major_locator(mdates.YearLocator(2))  # One tick every 2 years
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Display only the year

# Beautify the plot
plt.grid(True)
plt.legend()
plt.xticks(rotation=45)  # Rotate ticks to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
