In [11]:
import pandas as pd
from pypfopt import expected_returns, risk_models, EfficientFrontier

data = pd.read_csv('combined_eod_data.csv', index_col='date', parse_dates=True)

# Step 2: Calculate expected returns and covariance matrix
mu = expected_returns.mean_historical_return(data)
S = risk_models.sample_cov(data)

# Step 3: Optimize portfolio for maximum Sharpe ratio
ef = EfficientFrontier(mu, S)
weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()
print(cleaned_weights)

# Step 4: Calculate performance
performance = ef.portfolio_performance(verbose=True)


OrderedDict({'AAPL': 0.0, 'AMZN': 0.0, 'AVGO': 0.0, 'BRK-B': 0.0, 'FB': 0.0, 'GOOG': 0.0, 'GOOGL': 0.0, 'JPM': 0.0, 'LLY': 0.58015, 'MSFT': 0.0, 'NVDA': 0.41985, 'TSLA': 0.0, 'UNH': 0.0, 'V': 0.0, 'XOM': 0.0})
Expected annual return: 70.0%
Annual volatility: 32.8%
Sharpe Ratio: 2.07


  returns = prices.pct_change().dropna(how="all")
  returns = prices.pct_change().dropna(how="all")


In [12]:

import pandas as pd
from pypfopt import expected_returns, risk_models, EfficientFrontier

data = pd.read_csv('combined_eod_data.csv', index_col='date', parse_dates=True)

mu = expected_returns.mean_historical_return(data)
S = risk_models.sample_cov(data, frequency=252)

max_weight = 1 / len(data.columns) * 2

ef = EfficientFrontier(mu, S, weight_bounds=(0.01, max_weight))
weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()
print(cleaned_weights)

efmin = EfficientFrontier(None, S, weight_bounds=(0.01, max_weight))
efmin.min_volatility()
weights = efmin.clean_weights()
print(weights)

S




OrderedDict({'AAPL': 0.01, 'AMZN': 0.01, 'AVGO': 0.01, 'BRK-B': 0.13333, 'FB': 0.01, 'GOOG': 0.13333, 'GOOGL': 0.01, 'JPM': 0.01, 'LLY': 0.13333, 'MSFT': 0.13333, 'NVDA': 0.13333, 'TSLA': 0.01, 'UNH': 0.13333, 'V': 0.01, 'XOM': 0.12})
OrderedDict({'AAPL': 0.01, 'AMZN': 0.03784, 'AVGO': 0.01, 'BRK-B': 0.13333, 'FB': 0.13333, 'GOOG': 0.05779, 'GOOGL': 0.01887, 'JPM': 0.03337, 'LLY': 0.13333, 'MSFT': 0.01213, 'NVDA': 0.01, 'TSLA': 0.01, 'UNH': 0.13333, 'V': 0.13333, 'XOM': 0.13333})


  returns = prices.pct_change().dropna(how="all")
  returns = prices.pct_change().dropna(how="all")


Unnamed: 0,AAPL,AMZN,AVGO,BRK-B,FB,GOOG,GOOGL,JPM,LLY,MSFT,NVDA,TSLA,UNH,V,XOM
AAPL,0.217336,0.069397,0.083551,0.043012,0.061914,0.073743,0.072361,0.052087,0.035026,0.077798,0.115204,0.226909,0.047126,0.059452,0.03358
AMZN,0.069397,0.306907,0.072431,0.026512,0.05084,0.070036,0.065026,0.030965,0.026316,0.074019,0.111412,0.101455,0.022022,0.040483,0.018304
AVGO,0.083551,0.072431,0.319207,0.039065,0.05488,0.069512,0.066869,0.059627,0.032267,0.076687,0.146209,0.126883,0.039147,0.055488,0.043516
BRK-B,0.043012,0.026512,0.039065,0.046404,0.025902,0.035246,0.032514,0.053613,0.023783,0.035563,0.04473,0.041887,0.035506,0.040766,0.043941
FB,0.061914,0.05084,0.05488,0.025902,0.10593,0.054438,0.054561,0.033392,0.025045,0.052814,0.080407,0.072787,0.031897,0.043199,0.02321
GOOG,0.073743,0.070036,0.069512,0.035246,0.054438,0.10186,0.099584,0.042198,0.028935,0.07279,0.102786,0.091081,0.03392,0.048903,0.030187
GOOGL,0.072361,0.065026,0.066869,0.032514,0.054561,0.099584,0.283358,0.033618,0.027123,0.071513,0.099378,0.090555,0.023927,0.045377,0.027381
JPM,0.052087,0.030965,0.059627,0.053613,0.033392,0.042198,0.033618,0.102401,0.024792,0.041583,0.057099,0.064793,0.04398,0.054398,0.063274
LLY,0.035026,0.026316,0.032267,0.023783,0.025045,0.028935,0.027123,0.024792,0.102908,0.036163,0.046223,0.019202,0.037143,0.027961,0.021783
MSFT,0.077798,0.074019,0.076687,0.035563,0.052814,0.07279,0.071513,0.041583,0.036163,0.09188,0.111416,0.096629,0.040352,0.052284,0.026405


In [16]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('combined_eod_data.csv')

# Set the 'date' column as the index
df.set_index('date', inplace=True)

# Convert the data to numeric, ignoring errors for any non-numeric data
df = df.apply(pd.to_numeric, errors='coerce')

# Calculate daily percentage changes
returns = df.pct_change()

# Drop the first row as it will contain NaN values due to the pct_change calculation
returns.dropna(inplace=True)

# Calculate the covariance matrix of the percentage changes
cov_matrix = returns.cov()

# Display the covariance matrix
print(cov_matrix)

           AAPL      AMZN      AVGO     BRK-B      GOOG     GOOGL       JPM  \
AAPL   0.000291  0.000221  0.000199  0.000091  0.000214  0.000201  0.000093   
AMZN   0.000221  0.001652  0.000279  0.000095  0.000278  0.000246  0.000122   
AVGO   0.000199  0.000279  0.001547  0.000070  0.000224  0.000202  0.000130   
BRK-B  0.000091  0.000095  0.000070  0.000114  0.000094  0.000076  0.000107   
GOOG   0.000214  0.000278  0.000224  0.000094  0.000394  0.000379  0.000097   
GOOGL  0.000201  0.000246  0.000202  0.000076  0.000379  0.001500  0.000044   
JPM    0.000093  0.000122  0.000130  0.000107  0.000097  0.000044  0.000227   
LLY    0.000072  0.000083  0.000084  0.000051  0.000072  0.000059  0.000042   
META   0.000084  0.000022 -0.000054 -0.000319  0.000306  0.000296 -0.000307   
MSFT   0.000198  0.000273  0.000225  0.000080  0.000241  0.000229  0.000082   
NVDA   0.000327  0.000456  0.000548  0.000119  0.000385  0.000360  0.000168   
TSLA   0.000318  0.000353  0.000344  0.000091  0.000

  returns = df.pct_change()


In [7]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('combined_eod_data.csv')

# Set the 'date' column as the index
df.set_index('date', inplace=True)

# Convert the data to numeric, ignoring errors for any non-numeric data
df = df.apply(pd.to_numeric, errors='coerce')

# Calculate daily percentage changes
returns = df.pct_change(fill_method=0)

print(returns.head())

# Drop the first row as it will contain NaN values due to the pct_change calculation
returns.dropna(inplace=True)

print(returns.head())

# Calculate the correlation matrix of the percentage changes
corr_matrix = returns.corr()

# Display the correlation matrix
print(corr_matrix)

  returns = df.pct_change(fill_method=0)


ValueError: Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got 0