# Importing initial libraries


In [None]:
import pandas as pd
import yfinance as yf
from pandas_datareader import data as pdr

# Downloading and saving data


In [None]:
# Set up yfinance to use pandas data reader
yf.pdr_override()

# Define the list of tickers
tickers = ['COR', 'CE', 'HUM', 'CINF', 'KEYS', 'NXPI', 'BMY', 'EMR',
           'CVS', 'RVTY', 'ES', 'DHI', 'ZBRA', 'KMI', 'INVH', 'GPC', 'SWKS', 'GIS']

# Define the date range
start_date = '2002-01-01'
end_date = '2022-12-31'
Output_file = 'stock_prices.xlsx'

# Create a pandas DataFrame to store the data
price_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Close']
adj_price_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Adj Close']
volume_data = pdr.get_data_yahoo(
    tickers, start=start_date, end=end_date)['Volume']

In [None]:
# Convert the datetime index to string in the format "MM/DD/YYYY"
price_data.index = price_data.index.strftime('%m/%d/%Y')
adj_price_data.index = adj_price_data.index.strftime('%m/%d/%Y')
volume_data.index = volume_data.index.strftime('%m/%d/%Y')

In [None]:

# Create an Excel writer
with pd.ExcelWriter(Output_file, engine='openpyxl') as writer:
    # Save close prices to "Price_daily" sheet
    price_data.to_excel(writer, sheet_name='Price_daily')

    # Save adjusted close prices to "Adj_Price_daily" sheet
    adj_price_data.to_excel(writer, sheet_name='Adj_Price_daily')

    # Save volume data to "Volume_daily" sheet
    volume_data.to_excel(writer, sheet_name='Volume_daily')

print("Stock prices saved to stock_prices.xlsx")

In [None]:
sp500_data = pdr.get_data_yahoo('^GSPC', start=start_date, end=end_date)[
    ["Adj Close", "Close", "High", "Low", "Open", "Volume"]]
sp500_data.index = sp500_data.index.strftime('%m/%d/%Y')

with pd.ExcelWriter(Output_file, engine='openpyxl', mode='a', if_sheet_exists="replace") as writer:
    sp500_data.to_excel(writer, sheet_name='S&P 500')
print("Prices for the S&P 500 index (^GSPC) saved to stock_prices.xlsx")

# Calculating Market Captilization


In [None]:
from Market_Cap import cal_market_cap, filter_last_date_per_year

constituents_file = "S&P 500 Constituent.xlsx"
cal_market_cap(start_date, end_date, constituents_file, Output_file, tickers)


sheet_name = 'Market_Caps'
filtered_df = filter_last_date_per_year(Output_file, sheet_name)


with pd.ExcelWriter(Output_file, engine='openpyxl', mode='a', if_sheet_exists="replace") as writer:
    filtered_df.to_excel(writer, sheet_name='Market_Caps')

print("Annual maket capital saved to stock_prices.xlsx")

# Calculating annual liquidity


##### Calculateing the sum of daily Volume for each firm per year, dividing it by the total shares outstanding, and saving it in the sheet “Liquidity_annual”. This is a measure of how frequently a stock is traded, and generally, it is perceived as a measure of its liquidity.


In [None]:
from liquidity import cal_liquidity
excel_file = "Stock Data Output.xlsx"  # Replace with your file path
cal_liquidity(Output_file, tickers, constituents_file)

# Calculating returns


##### Using adjusted close prices at the annual, monthly, and daily frequencies, computing the annual, monthly, and daily returns. Saving them in new sheets labeled “Returns_annual”, “Returns_monthly”, and “Returns_daily”.


In [None]:
from returns import cal_returns

cal_returns(Output_file, tickers, adj_price_data, start_date, end_date)

# Calculating annual risks


##### Using Returtns_daily, calculating the standard deviation of each stock in each year. Saving these in a new sheet called “Risk_annual”.


In [None]:
from risk import cal_risks

cal_risks(Output_file, tickers, adj_price_data)

# Calculating statistics


##### Calcuating summary statistics of our portfolio holdings (in sheet “Firm_Summary_Stat”)


In [None]:
from beta import beta_main
returns_sheet_name = 'Returns_annual'
constituents_sheet_name = 'S&P 500 Constituent'

returns_data = pd.read_excel(
    Output_file, sheet_name=returns_sheet_name, index_col=0)
constituents_data = pd.read_excel(
    constituents_file, sheet_name=constituents_sheet_name, index_col=0)

# Filter returns data for the last 5 years (2018:2022)
returns_data_last_5_years = returns_data.loc['2018-01-01':'2022-12-31']

# Create a new DataFrame for summary statistics
summary_stats_df = pd.DataFrame()


# Calculate and add summary statistics for each firm
summary_stats_df['Min'] = returns_data_last_5_years.min()
summary_stats_df['Max'] = returns_data_last_5_years.max()
summary_stats_df['Mean'] = returns_data_last_5_years.mean()
summary_stats_df['Volatility'] = returns_data_last_5_years.std()

# Load market capitalization data
market_cap_data = pd.read_excel(
    Output_file, sheet_name="Market_Cap", index_col=0)

# Add market capitalization (size) for each firm to the summary_stats_df
# You can use mean() or any other aggregation method
summary_stats_df['Size'] = market_cap_data.mean()


# Add industry information for each firm
summary_stats_df['Industry'] = constituents_data['GICS Sector']

# Add a new sheet "Firm_Summary_Stat" to the existing Excel file
with pd.ExcelWriter(Output_file, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    summary_stats_df.T.to_excel(
        writer, sheet_name="Firm_Summary_Stat", index=True, header=True)


beta_main(tickers, start_date, end_date,
          constituents_file, Output_file, summary_stats_df)

# Personal Portfolio Analysis


##### Strategy = Return: Every January, invest more in firms that had a larger return last year. If they had negative returns, do not invest in them this year.


In [None]:
from portfolio_analysis import cal_portfolio

combined_returns, Rf = cal_portfolio(tickers, adj_price_data)

# Calculating portfolio performance


In [None]:
from portfolio_perf import cal_portfolio_perf

cal_portfolio_perf(combined_returns, Rf)

# Calculating Portfolio Returns


In [None]:
from portfolio_return import cal_portfolio_return

cal_portfolio_return(combined_returns, Rf)

# Calculating investment summary


In [None]:
from invest_sum import cal_invest_sum

cal_invest_sum(tickers, Rf, combined_returns, constituents_file, Output_file)

# Making excel file more readalbe.

###### (This is the last step)


#### Wraping Text
