# Importing initial libraries


In [2]:
import pandas as pd
import yfinance as yf
from pandas_datareader import data as pdr

# Downloading and saving data


In [3]:
# Set up yfinance to use pandas data reader
yf.pdr_override()

# Define the list of tickers
tickers = ['COR', 'CE', 'HUM', 'CINF', 'KEYS', 'NXPI', 'BMY', 'EMR',
           'CVS', 'RVTY', 'ES', 'DHI', 'ZBRA', 'KMI', 'INVH', 'GPC', 'SWKS', 'GIS']

# Define the date range
start_date = '2002-01-01'
end_date = '2022-12-31'
Output_file = 'stock_prices.xlsx'

# Create a pandas DataFrame to store the data
price_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Close']
adj_price_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Adj Close']
volume_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Volume']

[*********************100%%**********************]  18 of 18 completed
[*********************100%%**********************]  18 of 18 completed
[*********************100%%**********************]  18 of 18 completed


In [4]:
# Convert the datetime index to string in the format "MM/DD/YYYY"
price_data.index = price_data.index.strftime('%m/%d/%Y')
adj_price_data.index = adj_price_data.index.strftime('%m/%d/%Y')
volume_data.index = volume_data.index.strftime('%m/%d/%Y')

In [5]:

# Create an Excel writer
with pd.ExcelWriter(Output_file, engine='openpyxl') as writer:
    # Save close prices to "Price_daily" sheet
    price_data.to_excel(writer, sheet_name='Price_daily')

    # Save adjusted close prices to "Adj_Price_daily" sheet
    adj_price_data.to_excel(writer, sheet_name='Adj_Price_daily')

    # Save volume data to "Volume_daily" sheet
    volume_data.to_excel(writer, sheet_name='Volume_daily')

print("Stock prices saved to stock_prices.xlsx")

Stock prices saved to stock_prices.xlsx


In [6]:
sp500_data = pdr.get_data_yahoo('^GSPC', start=start_date, end=end_date)[
    ["Adj Close", "Close", "High", "Low", "Open", "Volume"]]
sp500_data.index = sp500_data.index.strftime('%m/%d/%Y')

with pd.ExcelWriter(Output_file, engine='openpyxl', mode='a', if_sheet_exists="replace") as writer:
    sp500_data.to_excel(writer, sheet_name='S&P 500')
print("Prices for the S&P 500 index (^GSPC) saved to stock_prices.xlsx")

[*********************100%%**********************]  1 of 1 completed
Prices for the S&P 500 index (^GSPC) saved to stock_prices.xlsx


# Calculating Market Captilization


In [7]:
from Market_Cap import cal_market_cap, filter_last_date_per_year

constituents_file = "S&P 500 Constituent.xlsx"
cal_market_cap(start_date, end_date, constituents_file, Output_file, tickers)


sheet_name = 'Market_Caps'
filtered_df = filter_last_date_per_year(Output_file, sheet_name)


with pd.ExcelWriter(Output_file, engine='openpyxl', mode='a', if_sheet_exists="replace") as writer:
    filtered_df.to_excel(writer, sheet_name='Market_Caps')

print("Annual maket capital saved to stock_prices.xlsx")

Market capitalization data added to the Excel file.
          Date           COR            CE           HUM          CINF  \
0   2002-12-31  2.095853e+09           NaN  1.107812e+09  2.546110e+09   
1   2003-12-31  2.216051e+09           NaN  2.531349e+09  2.904205e+09   
2   2004-12-31  2.272264e+09           NaN  3.289090e+09  3.317502e+09   
3   2005-12-30  3.211910e+09  1.605232e+09  6.018737e+09  3.619249e+09   
4   2006-12-29  3.497821e+09  2.190745e+09  6.127302e+09  3.778134e+09   
5   2007-12-31  3.614503e+09  3.599710e+09  8.342923e+09  3.408419e+09   
6   2008-12-31  2.897553e+09  1.062671e+09  4.129919e+09  2.632865e+09   
7   2009-12-31  4.286258e+09  2.768339e+09  4.862183e+09  2.538391e+09   
8   2010-12-31  5.674673e+09  3.571044e+09  6.064159e+09  3.235817e+09   
9   2011-12-30  6.259214e+09  3.858796e+09  9.793614e+09  3.284150e+09   
10  2012-12-31  7.382483e+09  3.906720e+09  7.777370e+09  4.408009e+09   
11  2013-12-31  1.220708e+10  4.902934e+09  1.184524e+10  6.

# Calculating annual liquidity


##### Calculateing the sum of daily Volume for each firm per year, dividing it by the total shares outstanding, and saving it in the sheet “Liquidity_annual”. This is a measure of how frequently a stock is traded, and generally, it is perceived as a measure of its liquidity.


In [8]:
# Create a new sheet for liquidity
excel_file = Output_file  # Replace with your file path
sheet_name = "Liquidity_annual"

from Market_Cap import get_historical_data

# Check if the sheet already exists and remove it
with pd.ExcelWriter(excel_file, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    # Create an empty list for liquidity data
    liquidity_data = []

    # Iterate through each year
    for year in range(2002, 2023):  # Adjusted the range to include 2022
        # Extract closing prices for the last trading day of each year

        # Iterate through each custom ticker
        for ticker in tickers:
            try:
                stock_data = get_historical_data(ticker, f"{year}-01-01", f"{year}-12-31")
                if stock_data is not None:
                    daily_volume = stock_data["Volume"]
                    total_shares_outstanding = constituents_file.loc[constituents_file["ticker"] == ticker, "Share_outstanding"].values[0]
                    liquidity = daily_volume.sum() / total_shares_outstanding

                    # Append a dictionary to the list
                    liquidity_data.append({"Ticker": ticker, "Liquidity": liquidity, "Year": year})
                else:
                    print(f"No data available for {ticker} in {year}")
            except Exception as e:
                print(f"Failed download for {ticker}: {e}")

        # Convert the list to a DataFrame
        liquidity_df = pd.DataFrame(liquidity_data)

        # Pivot the DataFrame to have years in the first column and tickers in the header row
        pivot_liquidity_df = liquidity_df.pivot(index='Year', columns='Ticker', values='Liquidity')

        # Save the pivoted liquidity data for the year in a new sheet
        pivot_liquidity_df.to_excel(writer, sheet_name=sheet_name, index=True, header=True)

print("Liquidity data added to the Excel file.")

Failed download for COR: 'str' object has no attribute 'loc'


CE: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for CE: 'str' object has no attribute 'loc'
Failed download for HUM: 'str' object has no attribute 'loc'
Failed download for CINF: 'str' object has no attribute 'loc'


KEYS: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for KEYS: 'str' object has no attribute 'loc'


NXPI: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for NXPI: 'str' object has no attribute 'loc'
Failed download for BMY: 'str' object has no attribute 'loc'
Failed download for EMR: 'str' object has no attribute 'loc'
Failed download for CVS: 'str' object has no attribute 'loc'
Failed download for RVTY: 'str' object has no attribute 'loc'
Failed download for ES: 'str' object has no attribute 'loc'
Failed download for DHI: 'str' object has no attribute 'loc'
Failed download for ZBRA: 'str' object has no attribute 'loc'


KMI: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for KMI: 'str' object has no attribute 'loc'


INVH: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for INVH: 'str' object has no attribute 'loc'
Failed download for GPC: 'str' object has no attribute 'loc'
Failed download for SWKS: 'str' object has no attribute 'loc'
Failed download for GIS: 'str' object has no attribute 'loc'


KeyError: 'Year'

In [14]:
liquidity

NameError: name 'liquidity' is not defined

In [None]:
from liquidity import cal_liquidity
excel_file = "Stock Data Output.xlsx"  # Replace with your file path
sheet_name = "Liquidity_annual"
cal_liquidity(Output_file, tickers, constituents_file, sheet_name)

Failed download for COR: 'str' object has no attribute 'loc'


CE: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for CE: 'str' object has no attribute 'loc'
Failed download for HUM: 'str' object has no attribute 'loc'
Failed download for CINF: 'str' object has no attribute 'loc'


KEYS: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for KEYS: 'str' object has no attribute 'loc'


NXPI: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for NXPI: 'str' object has no attribute 'loc'
Failed download for BMY: 'str' object has no attribute 'loc'
Failed download for EMR: 'str' object has no attribute 'loc'
Failed download for CVS: 'str' object has no attribute 'loc'
Failed download for RVTY: 'str' object has no attribute 'loc'
Failed download for ES: 'str' object has no attribute 'loc'
Failed download for DHI: 'str' object has no attribute 'loc'
Failed download for ZBRA: 'str' object has no attribute 'loc'


KMI: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for KMI: 'str' object has no attribute 'loc'


INVH: Data doesn't exist for startDate = 1009861200, endDate = 1041310800


Failed download for INVH: 'str' object has no attribute 'loc'
Failed download for GPC: 'str' object has no attribute 'loc'
Failed download for SWKS: 'str' object has no attribute 'loc'
Failed download for GIS: 'str' object has no attribute 'loc'


KeyError: 'Year'

# Calculating returns


##### Using adjusted close prices at the annual, monthly, and daily frequencies, computing the annual, monthly, and daily returns. Saving them in new sheets labeled “Returns_annual”, “Returns_monthly”, and “Returns_daily”.


In [None]:
from returns import cal_returns

cal_returns(Output_file, tickers, adj_price_data, start_date, end_date)

# Calculating annual risks


##### Using Returtns_daily, calculating the standard deviation of each stock in each year. Saving these in a new sheet called “Risk_annual”.


In [None]:
from risk import cal_risks

cal_risks(Output_file, tickers, adj_price_data)

# Calculating statistics


##### Calcuating summary statistics of our portfolio holdings (in sheet “Firm_Summary_Stat”)


In [None]:
from beta import beta_main
returns_sheet_name = 'Returns_annual'
constituents_sheet_name = 'S&P 500 Constituent'

returns_data = pd.read_excel(
    Output_file, sheet_name=returns_sheet_name, index_col=0)
constituents_data = pd.read_excel(
    constituents_file, sheet_name=constituents_sheet_name, index_col=0)

# Filter returns data for the last 5 years (2018:2022)
returns_data_last_5_years = returns_data.loc['2018-01-01':'2022-12-31']

# Create a new DataFrame for summary statistics
summary_stats_df = pd.DataFrame()


# Calculate and add summary statistics for each firm
summary_stats_df['Min'] = returns_data_last_5_years.min()
summary_stats_df['Max'] = returns_data_last_5_years.max()
summary_stats_df['Mean'] = returns_data_last_5_years.mean()
summary_stats_df['Volatility'] = returns_data_last_5_years.std()

# Load market capitalization data
market_cap_data = pd.read_excel(
    Output_file, sheet_name="Market_Cap", index_col=0)

# Add market capitalization (size) for each firm to the summary_stats_df
# You can use mean() or any other aggregation method
summary_stats_df['Size'] = market_cap_data.mean()


# Add industry information for each firm
summary_stats_df['Industry'] = constituents_data['GICS Sector']

# Add a new sheet "Firm_Summary_Stat" to the existing Excel file
with pd.ExcelWriter(Output_file, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    summary_stats_df.T.to_excel(
        writer, sheet_name="Firm_Summary_Stat", index=True, header=True)


beta_main(tickers, start_date, end_date,
          constituents_file, Output_file, summary_stats_df)

# Personal Portfolio Analysis


##### Strategy = Return: Every January, invest more in firms that had a larger return last year. If they had negative returns, do not invest in them this year.


In [None]:
from portfolio_analysis import cal_portfolio

combined_returns, Rf = cal_portfolio(tickers, adj_price_data)

# Calculating portfolio performance


In [None]:
from portfolio_perf import cal_portfolio_perf

cal_portfolio_perf(combined_returns, Rf)

# Calculating Portfolio Returns


In [None]:
from portfolio_return import cal_portfolio_return

cal_portfolio_return(combined_returns, Rf)

# Calculating investment summary


In [None]:
from invest_sum import cal_invest_sum

cal_invest_sum(tickers, Rf, combined_returns, constituents_file, Output_file)

# Making excel file more readalbe.

###### (This is the last step)


#### Wraping Text
