In [1]:
#Import Packages
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
import numpy as np
import yfinance as yf
import pytz

In [2]:
#Create Term Splitting Logic
user_trade_term = '5Yrs'

if user_trade_term == '10Yrs':
    trade_term = 10
    
if user_trade_term == '5Yrs':
    trade_term = 5

if user_trade_term == '1Yrs':
    trade_term = 1

if user_trade_term == '30Dys':
    trade_term = 0.082135

if user_trade_term == '7Dys':
    trade_term = 0.0191781

if user_trade_term == '6Mth':
    trade_term = 0.5
    
# Read in the stocks dataset to clean it
World_Stocks = pd.read_csv(Path(r"C:\Users\Oluwa\GITHUB\Bootcamp\ASSIGNMENTS\Project_Uno\Resoources\World-Stock-Prices-Dataset.csv"))

#Cut dataset down to US stocks only
World_Stocks = World_Stocks[World_Stocks['Country'] == 'usa']

#Read in the bond yields dataset for later
Bond_yields = pd.read_csv(Path(r"C:\Users\Oluwa\GITHUB\Bootcamp\ASSIGNMENTS\Project_Uno\Resoources\bond_yields_all.csv"))
                         
# Specify the format of the 'Date' column and save it as a variable
date_format = '%Y-%m-%d %H:%M:%S%z'

# Use the variable to reformat the column using a universal timezone and handle errors
try:
    World_Stocks['Date'] = pd.to_datetime(World_Stocks['Date'], format=date_format, utc=True, errors='coerce')
#Use the except functionality to print an error if this doesnt work
except Exception as e:
    print(f"An error occurred while parsing datetime values: {e}")

#Make sure Bond_yields has the same format
Bond_yields['Date'] = pd.to_datetime(Bond_yields['date'], format=date_format, utc=True, errors='coerce')

# Set 'Date' format to match 'World_Stocks' and fill missing values with 0
Bond_yields['Date'] = Bond_yields['Date'].dt.strftime('%Y-%m-%d %H:%M:%S%z')
Bond_yields.fillna(0, inplace=True)

Yr5_Bond=Bond_yields.drop(columns='date') #cleanup

# Convert the 'Date' column to datetime
Yr5_Bond['Date'] = pd.to_datetime(Yr5_Bond['Date'], format='%Y-%m-%d %H:%M:%S%z', errors='coerce')
Yr5_Bond.fillna(0, inplace=True)

# Make sure all stock data calls cut off at the same datetime by making their timezones match
cutoff_date = datetime(2023, 9, 20) - timedelta(days=365 * trade_term)

# Convert cutoff_date to a string with timezone information
cutoff_date_str = cutoff_date.strftime(date_format)

# Manually add UTC offset to the string (+00:00 for UTC) | Ensure that all dates in the string match the UTC format
cutoff_date_str += '+00:00'

# Parse the string back to a datetime64 object with the same timezone
try:
    cutoff_date = pd.to_datetime(cutoff_date_str, format=date_format, utc=True)
#Use the except functionality to print an error if this doesnt work
except Exception as e:
    print(f"An error occurred while parsing the cutoff_date: {e}")

#Replace the timezone information with the one from the first date of the dataframe. Normalizing all the date data.
cutoff_date = cutoff_date.replace(tzinfo=World_Stocks['Date'].iloc[0].tzinfo)

# Create a new dataframe variable with only the cells that are after the cut off date
Smaller_Dataframe = World_Stocks[World_Stocks['Date'] >= cutoff_date]

# Convert the 'Date' column in 'Yr5_Bond' to string format
Yr5_Bond['Date'] = Yr5_Bond['Date'].dt.strftime('%Y-%m-%d')

# Convert the 'Date' column in Smaller_Dataframe to string format
Smaller_Dataframe['Date'] = Smaller_Dataframe['Date'].astype(str)

# Extract the first 10 characters from the 'Date' column in 'Smaller_Dataframe'
matching_dates = Smaller_Dataframe['Date'].str[:10].unique()

# Filter 'Yr5_Bond' based on matching dates
Bond_yields_filtered = Yr5_Bond[Yr5_Bond['Date'].str[:10].isin(matching_dates)]

# Convert 'Date' column in 'Yr5_Bond' back to the original datetime format
Yr5_Bond['Date'] = pd.to_datetime(Yr5_Bond['Date'], format='%Y-%m-%d')

# Convert 'Date' column in 'Bond_yields_filtered' back to the original datetime format
Bond_yields_filtered['Date'] = pd.to_datetime(Bond_yields_filtered['Date'], format='%Y-%m-%d')

#For some reason I couldnt organize the dataframe unless I explicitly copied it. Tried to just make it a variable
Calculations_df = Smaller_Dataframe.copy()

#Sort the dataframe and clean the data
Calculations_df.sort_values(['Ticker', 'Date'], inplace=True)
Useless_Columns=['Dividends', 'Stock Splits']
Calculations_df.drop(columns=Useless_Columns)
Calculations_df.dropna()
Compared_Calcs= Calculations_df.groupby(['Date', 'Ticker'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Smaller_Dataframe['Date'] = Smaller_Dataframe['Date'].astype(str)


In [3]:
# Import data for S&P 50 from Yahoo
ticker_symbols = '^GSPC'

#Set variables for later
end_date = datetime(2023, 9, 20)
end_date = end_date.replace(tzinfo=pytz.UTC)
start_date = end_date - timedelta(days=365 * trade_term)
SPData=pd.DataFrame()

# Fetch historical data from Yahoo Finance
company = yf.Ticker(ticker_symbols)
historical_data = company.history(period="max")

# Filter data for the specified date range (5 years from the end of September)
historical_data = historical_data[(historical_data.index >= start_date) & (historical_data.index <= end_date)]

# Reset the index to make the date a column
historical_data.reset_index(inplace=True)

historical_data['Ticker'] = ticker_symbols

# Create a new DataFrame by concatenating filtered data
SPData = pd.concat([SPData, historical_data])

# Drop columns from the new DataFrame
SPData = SPData.drop(columns=Useless_Columns)

# Add new columns
SPData['Brand_Name'] = 'SP500'
SPData['Industry_Tag'] = 'Market Reference'
SPData['Country'] = 'Global'

# Drop rows with missing values (if needed)
SPData = SPData.dropna()

# Sort the DataFrame
SPData.sort_values(['Ticker', 'Date'], inplace=True)

# Initialize an empty DataFrameand list to store Beta values
beta_df = pd.DataFrame(columns=['Ticker', 'Beta'])
beta_data=[]


In [4]:
# Calculate daily returns using the Open and Close columns
Calculations_df['Daily_Return'] = Calculations_df['Close'].pct_change() * 100
SPData['SP500_Daily_Return'] = SPData['Close'].pct_change() * 100


# Create a new DataFrame for Compared_Calcs with 'Daily_Return' values
Compared_Calcs = Calculations_df[['Ticker', 'Daily_Return', 'Date']].copy()

# Check for missing values in 'Daily_Return' column of Compared_Calcs
missing_values = Compared_Calcs['Daily_Return'].isna().sum()

# Handle missing values (e.g., fill or drop them)
if missing_values > 0:
    Compared_Calcs['Daily_Return'].fillna(0, inplace=True)

#Calculate cumulative daily returns for each Ticker
Compared_Calcs['Cumulative_Return'] = (1 + Compared_Calcs['Daily_Return']).groupby(Compared_Calcs['Ticker']).cumprod()

#Calculate the average annualized yield
days_per_year = 252*trade_term  
Compared_Calcs['Yield'] = (Compared_Calcs['Cumulative_Return'] ** (1 / days_per_year) - 1) * 100

In [5]:
# Calculate Volatility
stock_vol = Calculations_df.groupby('Ticker')['Daily_Return'].std()

# Calculate Value at Risk on each section grouped by Ticker/ Date
def calculate_var(group):
    return group['Daily_Return'].quantile(1 - .95)

# Apply the function to each group
var_df = Compared_Calcs.groupby('Ticker').apply(calculate_var).reset_index()

# Create a new dataframe with this info
var_df.columns = ['Ticker', 'VaR']

# Merge the stock data and S&P 500 data based on the index
merged_data = Compared_Calcs.merge(SPData[['SP500_Daily_Return']], left_index=True, right_index=True, how='inner')

# Calculate Beta for each Ticker
for ticker in Compared_Calcs['Ticker'].unique():
    if ticker != '^GSPC':
        # Filter data for the current stock and ^GSPC
        stock_data = merged_data[merged_data['Ticker'] == ticker]
        
        # Calculate the covariance between stock returns and ^GSPC returns
        covariance = np.cov(stock_data['Daily_Return'], stock_data['SP500_Daily_Return'])[0, 1]
        
        # Calculate the variance of ^GSPC returns
        variance_SP500 = np.var(stock_data['SP500_Daily_Return'])
        
        # Calculate Beta
        beta = covariance / variance_SP500
        
        # Append the data as a tuple to the list
        beta_data.append((ticker, beta))

# Convert the list of tuples into a DataFrame
beta_df = pd.DataFrame(beta_data, columns=['Ticker', 'Beta'])

#Create a variable for the average bond yield in a 5 year period
Avg_5Yr_Bond_Yield = Bond_yields_filtered['CDN.AVG.3YTO5Y.AVG'].mean()

#Calculate the difference between the average bond yield and each stock's yield:
Compared_Calcs['Bond Safety Ratio'] = abs(Compared_Calcs['Yield'] - Avg_5Yr_Bond_Yield)

#Reorganize Compared Calcs by Ticker
regrouped_compared_calcs = Compared_Calcs.groupby(['Ticker', 'Date']).mean()

In [6]:
#Check for correlations
correlation_matrix1 = regrouped_compared_calcs[['Bond Safety Ratio', 'Daily_Return']].corr()
correlation_matrix2 = merged_data[['SP500_Daily_Return', 'Daily_Return']].corr()
#Both have weak correlations | Abandoning correlation matrix

In [7]:
#List of Calculated Variables for Analysis
regrouped_compared_calcs
beta_df
var_df
stock_vol
SPData

# Find the top 50 highest cumulative returns while excluding 'inf' values
top_cumulative_returns = regrouped_compared_calcs[regrouped_compared_calcs['Cumulative_Return'] != float('inf')]['Cumulative_Return'].nlargest(50)
# Count occurrences of each 'Ticker'
cumulative_ticker_count = top_cumulative_returns.groupby(level=0).size()
cumulative_ticker_count = cumulative_ticker_count.sort_values(ascending=False)
cumulative_ticker_count_list = cumulative_ticker_count.index.tolist()
# Find the fifteen most frequent 'Ticker' values
most_frequent_tickers_list = cumulative_ticker_count.nlargest(15).index.tolist()


# Find the best stocks based on Beta
lowest_risk_stocks = beta_df.sort_values(by='Beta').head(15)
lowest_risk_stocks_list = lowest_risk_stocks['Ticker'].tolist()

# Find the top 15 stocks with the lowest VaR
lowest_var_stock = var_df.nsmallest(15, 'VaR')
lowest_var_stock_list = lowest_var_stock['Ticker'].tolist()

# Get the 15 cells with the lowest values
lowest_volatility = stock_vol.nsmallest(15)
lowest_volatility_list = lowest_volatility.index.tolist()

# Combine all the lists into one
master_analysis_list = (
    lowest_volatility_list +
    lowest_var_stock_list +
    lowest_risk_stocks_list +
    most_frequent_tickers_list +
    cumulative_ticker_count_list
)

# Create a dictionary to count the frequency of each unique value
stock_count = {}
for stock in master_analysis_list:
    if stock in stock_count:
        stock_count[stock] += 1
    else:
        stock_count[stock] = 1

# Sort the stocks by frequency (most frequent first)
low_risk_stocks_master_list = sorted(stock_count.keys(), key=lambda x: stock_count[x], reverse=True)

#Define the function to show a recommendaiton of 10 stocks
def print_first_10_strings(input_list):
    for item in input_list[:10]:
        print(item)

#Print Final Recommendation
print_first_10_strings(low_risk_stocks_master_list)

SQ
PTON
TSLA
CROX
PINS
HSY
HD
PG
COIN
JWN
