In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import numpy as np


In [2]:
# 6 big banks
stocks = ['GS', 'C', 'MS', 'JPM', 'BAC', 'WFC']

In [3]:
#pulls 3yrs of date history from today
date_range = '3y' 

end_date = datetime.now()
if date_range == '3y':
    start_date = end_date - timedelta(days=3 * 365)
else:
    raise ValueError("Invalid date range. Use '3y' for 3 years of historical data.")

start_date_str = start_date.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')

In [4]:
# empty data set, that has a loop to pull historical data for 3yrs, then adds all data to symbol column
data = []

for symbol in stocks:
    ticker = yf.Ticker(symbol)
    historical_data = ticker.history(start=start_date_str, end=end_date_str, interval='1d')[['Open', 'Close', 'High', 'Low', 'Volume']]

    if not historical_data.empty:
        historical_data['Symbol'] = symbol
        data.append(historical_data)
    else:
        print(f'Error data pull for {symbol}')

df = pd.concat(data)

# Filter the DataFrame to include only the 6 stocks
stocks_df = df[df['Symbol'].isin(stocks)]


In [5]:
# Group the stocks by date and then calculate the mean of OHLCV
BIGBportfolio = stocks_df.groupby(stocks_df.index).mean()

  BIGBportfolio = stocks_df.groupby(stocks_df.index).mean()


In [9]:
# Rename "synthetic" columns
BIGBportfolio.columns = ['Syn_Open', 'Syn_Close', 'Syn_High', 'Syn_Low', 'Sy_Volume']

In [8]:
BIGBportfolio

Unnamed: 0_level_0,Syn_Open,Syn_Close,Syn_High,Syn_Low,Sy_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-04-06 00:00:00-04:00,56.792727,57.901028,58.416042,56.223632,3.569765e+07
2020-04-07 00:00:00-04:00,61.378476,59.441934,62.008242,58.760297,3.601082e+07
2020-04-08 00:00:00-04:00,60.546056,62.963663,63.372256,60.035010,3.330760e+07
2020-04-09 00:00:00-04:00,64.691061,66.801018,68.097858,64.457045,5.029647e+07
2020-04-13 00:00:00-04:00,66.764498,64.550448,66.981557,64.120799,3.203900e+07
...,...,...,...,...,...
2023-03-29 00:00:00-04:00,108.368334,108.115000,108.678331,107.133331,1.990787e+07
2023-03-30 00:00:00-04:00,109.244999,107.958336,109.466665,107.335001,2.028310e+07
2023-03-31 00:00:00-04:00,108.813334,109.681664,109.856665,108.223337,2.070217e+07
2023-04-03 00:00:00-04:00,109.550002,109.476666,110.561669,108.565000,1.858058e+07


In [29]:
# Calculate the daily return
BIGBportfolio['Daily_Return'] = BIGBportfolio['Syn_Close'].pct_change()

# Calculate the annual return
annual_return = (1 + BIGBportfolio['Daily_Return']).prod()**(252 / len(BIGBportfolio['Daily_Return'])) - 1

# Calculate the annual volatility
annual_volatility = BIGBportfolio['Daily_Return'].std() * np.sqrt(252).


Annual Return: 0.23052359327033756
Annual Volatility: 0.29937828985398846


In [32]:
BIGBportfolio


Unnamed: 0_level_0,Syn_Open,Syn_Close,Syn_High,Syn_Low,Sy_Volume,Daily_Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-04-06 00:00:00-04:00,56.792727,57.901028,58.416042,56.223632,3.569765e+07,
2020-04-07 00:00:00-04:00,61.378476,59.441934,62.008242,58.760297,3.601082e+07,0.026613
2020-04-08 00:00:00-04:00,60.546056,62.963663,63.372256,60.035010,3.330760e+07,0.059247
2020-04-09 00:00:00-04:00,64.691061,66.801018,68.097858,64.457045,5.029647e+07,0.060946
2020-04-13 00:00:00-04:00,66.764498,64.550448,66.981557,64.120799,3.203900e+07,-0.033691
...,...,...,...,...,...,...
2023-03-29 00:00:00-04:00,108.368334,108.115000,108.678331,107.133331,1.990787e+07,0.009289
2023-03-30 00:00:00-04:00,109.244999,107.958336,109.466665,107.335001,2.028310e+07,-0.001449
2023-03-31 00:00:00-04:00,108.813334,109.681664,109.856665,108.223337,2.070217e+07,0.015963
2023-04-03 00:00:00-04:00,109.550002,109.476666,110.561669,108.565000,1.858058e+07,-0.001869


In [33]:
print("Annual Return:", annual_return)
print("Annual Volatility:", annual_volatility)

Annual Return: 0.23052359327033756
Annual Volatility: 0.29937828985398846
