In [2]:
import os
import pandas as pd

# Set the folder path and filtering criteria
directory = "SP500_10_20"
min_data_length = 2500  # Minimum number of rows required
top_stocks_count = 300  # Select top 300 large-cap stocks based on trading volume

# Store valid stock data
stock_data = {}
valid_tickers = []

for ticker in os.listdir(directory):
    if ticker.endswith(".csv"):
        file_path = os.path.join(directory, ticker)
        df = pd.read_csv(file_path)
        
        # Ensure sufficient data and presence of 'Close' and 'Volume' columns
        if df.shape[0] >= min_data_length and 'Close' in df.columns and 'Volume' in df.columns:
            df['Date'] = pd.to_datetime(df['Date'])
            df.set_index('Date', inplace=True)
            df.sort_index(inplace=True)  # Ensure chronological order
            
            avg_volume = df['Volume'].mean()
            valid_tickers.append({'ticker': ticker.replace(".csv", ""), 'avg_volume': avg_volume})
            stock_data[ticker.replace(".csv", "")] = df['Close']  # Store only the Close data

# Sort by average volume and select the top 300 large-cap stocks
valid_tickers = sorted(valid_tickers, key=lambda x: x['avg_volume'], reverse=True)
top_300_stocks = [ticker['ticker'] for ticker in valid_tickers[:top_stocks_count]]

# Generate return data
returns = pd.DataFrame({ticker: stock_data[ticker].pct_change().dropna() for ticker in top_300_stocks})

# Calculate annualized returns
mean_returns = returns.mean() * 252

# Output the list of top 300 stocks
print(top_300_stocks)


['NVDA', 'AAPL', 'BAC', 'TSLA', 'AMZN', 'GOOGL', 'GOOG', 'F', 'AMD', 'CMG', 'T', 'MSFT', 'INTC', 'CSCO', 'PFE', 'MU', 'AVGO', 'WMT', 'HPQ', 'C', 'CMCSA', 'LRCX', 'WFC', 'CSX', 'FCX', 'JPM', 'NFLX', 'ORCL', 'EBAY', 'RF', 'VZ', 'XOM', 'KO', 'MS', 'GM', 'AMAT', 'DAL', 'MRK', 'HAL', 'QCOM', 'KEY', 'GE', 'GILD', 'BSX', 'V', 'MGM', 'HBAN', 'SBUX', 'BMY', 'FTNT', 'GLW', 'KR', 'SCHW', 'DIS', 'PG', 'MO', 'EXC', 'SLB', 'MDLZ', 'AIG', 'WMB', 'ABT', 'NKE', 'JNJ', 'CCL', 'MNST', 'FITB', 'COP', 'NEE', 'USB', 'TJX', 'HST', 'UAL', 'OXY', 'MET', 'CVX', 'NEM', 'LOW', 'LUV', 'GEN', 'TSCO', 'CVS', 'BA', 'PHM', 'VLO', 'RTX', 'LVS', 'ON', 'TXN', 'CRM', 'CTRA', 'PARA', 'HD', 'WBA', 'PCG', 'BK', 'DD', 'JNPR', 'APH', 'VTRS', 'CPRT', 'AES', 'DECK', 'DVN', 'EW', 'CAT', 'TGT', 'WDC', 'MA', 'MDT', 'STX', 'LLY', 'BBY', 'DHI', 'PM', 'PEP', 'IPG', 'MCD', 'AXP', 'SO', 'PPL', 'BAX', 'JCI', 'CF', 'BKR', 'APA', 'MCHP', 'IBM', 'TFC', 'FAST', 'AFL', 'MOS', 'UNH', 'WY', 'CAG', 'BX', 'NI', 'MAS', 'CTSH', 'EA', 'UNP', 'CNP', 

In [3]:
# Calculate the annualized covariance matrix (using 252 trading days for annualization)
cov_matrix = returns.cov() * 252

# Output the covariance matrix
print(cov_matrix)


          NVDA      AAPL       BAC      TSLA      AMZN     GOOGL      GOOG  \
NVDA  0.179438  0.056306  0.058765  0.076655  0.055745  0.052066  0.052011   
AAPL  0.056306  0.081002  0.040689  0.050112  0.039745  0.038218  0.038370   
BAC   0.058765  0.040689  0.126671  0.050855  0.035253  0.040070  0.040307   
TSLA  0.076655  0.050112  0.050855  0.306508  0.056784  0.046166  0.046092   
AMZN  0.055745  0.039745  0.035253  0.056784  0.100178  0.046672  0.046858   
...        ...       ...       ...       ...       ...       ...       ...   
LMT   0.030686  0.022579  0.032933  0.020286  0.019298  0.021819  0.021702   
CHD   0.022684  0.016602  0.016397  0.016541  0.015773  0.015127  0.014955   
ACGL  0.032271  0.023893  0.048104  0.025380  0.018641  0.024105  0.023840   
INCY  0.049592  0.033992  0.046742  0.055121  0.043944  0.036400  0.036430   
DVA   0.033737  0.023638  0.037113  0.032389  0.022188  0.024867  0.024887   

             F       AMD       CMG  ...      BIIB       RSG    

In [4]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# Define objective function: maximize Sharpe ratio
def objective(weights, mean_returns, cov_matrix, risk_free_rate=0.0):
    portfolio_return = np.sum(weights * mean_returns)
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility
    return -sharpe_ratio  # scipy's minimize only minimizes, so we take the negative

# Calculate number of assets
num_assets = len(mean_returns)

# Initial weights (evenly distributed)
initial_weights = np.ones(num_assets) / num_assets

# Constraint: sum of weights must equal 1
constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})

# Bounds for weights (between 0 and 1)
bounds = tuple((0, 1) for asset in range(num_assets))

# Run optimization
optimized_result = minimize(objective, initial_weights, args=(mean_returns, cov_matrix),
                            method='SLSQP', bounds=bounds, constraints=constraints)

# Get optimal weights
optimal_weights = optimized_result.x

# Convert to DataFrame
optimal_portfolio = pd.DataFrame({'Stock': mean_returns.index, 'Weight': optimal_weights})

# Select the top 60 stocks by weight
top_60 = optimal_portfolio.nlargest(60, 'Weight')

# Re-normalize weights
top_60['Weight'] = top_60['Weight'] / top_60['Weight'].sum()

# Sort by weight descending
top_60.sort_values(by='Weight', ascending=False, inplace=True)

# Display final portfolio
print(top_60)

# Save to CSV
top_60.to_csv("optimal_portfolio_top60.csv", index=False)
print("Optimal portfolio (top 60 stocks) has been saved to 'optimal_portfolio_top60.csv'")


    Stock        Weight
68    NEE  1.713741e-01
248   KDP  1.114839e-01
3    TSLA  8.973947e-02
190    DG  8.633484e-02
100  CPRT  7.096846e-02
200  MTCH  5.702417e-02
296   CHD  5.600794e-02
111   LLY  4.946399e-02
26   NFLX  4.090804e-02
165  DXCM  4.086301e-02
231  COST  3.783517e-02
262  TTWO  3.242329e-02
4    AMZN  3.145109e-02
281   STZ  2.610639e-02
278  ODFL  2.334512e-02
65   MNST  1.858633e-02
295   LMT  1.830349e-02
1    AAPL  1.319318e-02
0    NVDA  1.281173e-02
147   DHR  9.338084e-03
253  CSGP  2.438219e-03
132   UNH  3.416742e-14
188  FSLR  2.366116e-14
24    FCX  2.038831e-14
125   APA  1.924126e-14
103   DVN  1.798163e-14
38    HAL  1.720828e-14
148   IVZ  1.693331e-14
57    SLB  1.621053e-14
73    OXY  1.582886e-14
146   TPR  1.561639e-14
131   MOS  1.481832e-14
7       F  1.206376e-14
99   VTRS  1.125763e-14
195   BEN  1.114390e-14
64    CCL  1.086460e-14
167   HES  1.011407e-14
97   JNPR  9.963038e-15
45    MGM  9.667776e-15
124   BKR  9.632936e-15
19      C  9.463