# Importing initial libraries


In [2]:
import pandas as pd
import yfinance as yf
from pandas_datareader import data as pdr

# Downloading and saving data


In [3]:
# Set up yfinance to use pandas data reader
yf.pdr_override()

# Define the list of tickers
tickers = ['COR', 'CE', 'HUM', 'CINF', 'KEYS', 'NXPI', 'BMY', 'EMR',
           'CVS', 'RVTY', 'ES', 'DHI', 'ZBRA', 'KMI', 'INVH', 'GPC', 'SWKS', 'GIS']

# Define the date range
start_date = '2002-01-01'
end_date = '2022-12-31'
Output_file = 'stock_prices.xlsx'

# Create a pandas DataFrame to store the data
price_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Close']
adj_price_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Adj Close']
volume_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Volume']

[*********************100%%**********************]  18 of 18 completed
[*********************100%%**********************]  18 of 18 completed
[*********************100%%**********************]  18 of 18 completed


In [4]:
# Convert the datetime index to string in the format "MM/DD/YYYY"
price_data.index = price_data.index.strftime('%m/%d/%Y')
adj_price_data.index = adj_price_data.index.strftime('%m/%d/%Y')
volume_data.index = volume_data.index.strftime('%m/%d/%Y')

In [5]:

# Create an Excel writer
with pd.ExcelWriter(Output_file, engine='openpyxl') as writer:
    # Save close prices to "Price_daily" sheet
    price_data.to_excel(writer, sheet_name='Price_daily')

    # Save adjusted close prices to "Adj_Price_daily" sheet
    adj_price_data.to_excel(writer, sheet_name='Adj_Price_daily')

    # Save volume data to "Volume_daily" sheet
    volume_data.to_excel(writer, sheet_name='Volume_daily')

print("Stock prices saved to stock_prices.xlsx")

Stock prices saved to stock_prices.xlsx


In [6]:
sp500_data = pdr.get_data_yahoo('^GSPC', start=start_date, end=end_date)[
    ["Adj Close", "Close", "High", "Low", "Open", "Volume"]]
sp500_data.index = sp500_data.index.strftime('%m/%d/%Y')

with pd.ExcelWriter(Output_file, engine='openpyxl', mode='a', if_sheet_exists="replace") as writer:
    sp500_data.to_excel(writer, sheet_name='S&P 500')
print("Prices for the S&P 500 index (^GSPC) saved to stock_prices.xlsx")

[*********************100%%**********************]  1 of 1 completed
Prices for the S&P 500 index (^GSPC) saved to stock_prices.xlsx


# Calculating Market Captilization


In [7]:
from Market_Cap import cal_market_cap, filter_last_date_per_year

constituents_file = "S&P 500 Constituent.xlsx"
constituents_data = pd.read_excel(
    constituents_file, sheet_name="S&P 500 Constituent")
cal_market_cap(start_date, end_date, constituents_file, Output_file, tickers)


sheet_name = 'Market_Caps'
filtered_df = filter_last_date_per_year(Output_file, sheet_name)


with pd.ExcelWriter(Output_file, engine='openpyxl', mode='a', if_sheet_exists="replace") as writer:
    filtered_df.to_excel(writer, sheet_name='Market_Caps')

print("Annual maket capital saved to stock_prices.xlsx")

Market capitalization data added to the Excel file.
          Date           COR            CE           HUM          CINF  \
0   2002-12-31  2.095852e+09           NaN  1.107811e+09  2.546110e+09   
1   2003-12-31  2.216050e+09           NaN  2.531348e+09  2.904207e+09   
2   2004-12-31  2.272264e+09           NaN  3.289091e+09  3.317503e+09   
3   2005-12-30  3.211911e+09  1.605232e+09  6.018738e+09  3.619248e+09   
4   2006-12-29  3.497818e+09  2.190745e+09  6.127303e+09  3.778134e+09   
5   2007-12-31  3.614503e+09  3.599709e+09  8.342921e+09  3.408416e+09   
6   2008-12-31  2.897552e+09  1.062671e+09  4.129920e+09  2.632863e+09   
7   2009-12-31  4.286257e+09  2.768339e+09  4.862182e+09  2.538390e+09   
8   2010-12-31  5.674672e+09  3.571045e+09  6.064158e+09  3.235818e+09   
9   2011-12-30  6.259213e+09  3.858797e+09  9.793616e+09  3.284150e+09   
10  2012-12-31  7.382486e+09  3.906718e+09  7.777372e+09  4.408008e+09   
11  2013-12-31  1.220708e+10  4.902934e+09  1.184524e+10  6.

# Calculating annual liquidity


##### Calculateing the sum of daily Volume for each firm per year, dividing it by the total shares outstanding, and saving it in the sheet “Liquidity_annual”. This is a measure of how frequently a stock is traded, and generally, it is perceived as a measure of its liquidity.


In [8]:
from liquidity import cal_liquidity
excel_file = "Stock Data Output.xlsx"  # Replace with your file path
sheet_name = "Liquidity_annual"
constituents_data = constituents_data[["ticker", "Name", "Share_outstanding"]]
cal_liquidity(Output_file, tickers, constituents_data, sheet_name)

CE: Data doesn't exist for startDate = 1009861200, endDate = 1041310800
KEYS: Data doesn't exist for startDate = 1009861200, endDate = 1041310800
NXPI: Data doesn't exist for startDate = 1009861200, endDate = 1041310800
KMI: Data doesn't exist for startDate = 1009861200, endDate = 1041310800
INVH: Data doesn't exist for startDate = 1009861200, endDate = 1041310800
CE: Data doesn't exist for startDate = 1041397200, endDate = 1072846800
KEYS: Data doesn't exist for startDate = 1041397200, endDate = 1072846800
NXPI: Data doesn't exist for startDate = 1041397200, endDate = 1072846800
KMI: Data doesn't exist for startDate = 1041397200, endDate = 1072846800
INVH: Data doesn't exist for startDate = 1041397200, endDate = 1072846800
CE: Data doesn't exist for startDate = 1072933200, endDate = 1104469200
KEYS: Data doesn't exist for startDate = 1072933200, endDate = 1104469200
NXPI: Data doesn't exist for startDate = 1072933200, endDate = 1104469200
KMI: Data doesn't exist for startDate = 107293

Liquidity data added to the Excel file.


# Calculating returns


##### Using adjusted close prices at the annual, monthly, and daily frequencies, computing the annual, monthly, and daily returns. Saving them in new sheets labeled “Returns_annual”, “Returns_monthly”, and “Returns_daily”.


In [9]:
def download_data(ticker, start_date, end_date):
    benchmark_symbol = "^GSPC"
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    benchmark_data = yf.download(
        benchmark_symbol, start=start_date, end=end_date)
    return stock_data, benchmark_data


for ticker in tickers:
    data, benchmark_data = download_data(ticker, start_date, end_date)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [10]:
from returns import cal_returns

cal_returns(Output_file, tickers, data)

Data download, Excel file update, and returns calculation completed.


# Calculating annual risks


##### Using Returtns_daily, calculating the standard deviation of each stock in each year. Saving these in a new sheet called “Risk_annual”.


In [11]:
from risk import cal_risks

cal_risks(Output_file, tickers, data)

Data download, Excel file update, returns calculation, and risk calculation completed.


# Calculating statistics


##### Calcuating summary statistics of our portfolio holdings (in sheet “Firm_Summary_Stat”)


In [12]:
import pandas as pd
import numpy as np
from beta import cal_beta_main, calculate_beta

# Load the data from Excel file
returns_sheet_name = 'Returns_annual'
constituents_sheet_name = 'S&P 500 Constituent'

returns_data = pd.read_excel(
    Output_file, sheet_name=returns_sheet_name, index_col=0)
constituents_data = pd.read_excel(
    "S&P 500 Constituent.xlsx", sheet_name=constituents_sheet_name, index_col=0)

# Filter returns data for the last 5 years (2018:2022)
returns_data_last_5_years = returns_data.loc['2018-01-01':'2022-12-31']

# Create a new DataFrame for summary statistics
summary_stats_df = pd.DataFrame()


# Calculate and add summary statistics for each firm
summary_stats_df['Min'] = returns_data_last_5_years.min()
summary_stats_df['Max'] = returns_data_last_5_years.max()
summary_stats_df['Mean'] = returns_data_last_5_years.mean()
summary_stats_df['Volatility'] = returns_data_last_5_years.std()

# Load market capitalization data
market_cap_data = pd.read_excel(
    Output_file, sheet_name="Market_Caps", index_col=0)

# Add market capitalization (size) for each firm to the summary_stats_df
# You can use mean() or any other aggregation method
summary_stats_df['Size'] = market_cap_data.mean()


# Add industry information for each firm
summary_stats_df['Industry'] = constituents_data['GICS Sector']

# Add a new sheet "Firm_Summary_Stat" to the existing Excel file
with pd.ExcelWriter(Output_file, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    summary_stats_df.T.to_excel(
        writer, sheet_name="Firm_Summary_Stat", index=True, header=True)


cal_beta_main(tickers, data, benchmark_data, start_date, end_date,
              constituents_file, Output_file, summary_stats_df)

# Personal Portfolio Analysis


##### Strategy = Return: Every January, invest more in firms that had a larger return last year. If they had negative returns, do not invest in them this year.


In [13]:
from portfolio_analysis import cal_portfolio

combined_returns, Rf = cal_portfolio(
    tickers, Output_file)

[*********************100%%**********************]  18 of 18 completed


  FF_3Factor_All = pdr.get_data_famafrench(
  FF_3Factor_All = pdr.get_data_famafrench(


Optimization terminated successfully.
         Current function value: -1.342083
         Iterations: 19
         Function evaluations: 468
         Gradient evaluations: 26
[*********************100%%**********************]  1 of 1 completed


FileNotFoundError: [Errno 2] No such file or directory: 'Stock Data Output.xlsx'

# Calculating portfolio performance


In [None]:
from portfolio_perf import cal_portfolio_perf

cal_portfolio_perf(combined_returns, Rf)

# Calculating Portfolio Returns


In [None]:
from portfolio_return import cal_portfolio_return

cal_portfolio_return(combined_returns, Rf)

# Calculating investment summary


In [None]:
from invest_sum import cal_invest_sum

cal_invest_sum(tickers, Rf, combined_returns, constituents_file, Output_file)

# Making excel file more readalbe.

###### (This is the last step)


#### Wraping Text
