In [None]:
#All modules used.
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
import datetime
import pandas_datareader as data

In [None]:
#Importing Stock Information.
SPY_Firms = pd.read_csv('Firm_SPY500.csv', index_col = 'Symbol')
SPY_Firms.columns = (['Unnamed: 0', 'Firm', 'GICS Sector', 'Headquarters Location'])
SPY_Firms = SPY_Firms.sort_index()

In [None]:
#Create list of all Firm symbols, referred to as tickers, for looping purposes.
Symbols = []
for symbol in SPY_Firms.index:
    Symbols.append(symbol)

In [None]:
#Import daily performance info for each stock over the last 30 days into a dictionary. 
start = datetime.datetime(2021,10,11)
end = datetime.datetime(2021,11,22)
d = {}
for symbol in Symbols:
        try:
            d[symbol] = data.DataReader(symbol, 
                       start=start, 
                       end=end, 
                       data_source='yahoo')
        except KeyError:
            print('Further Analysis Required')

In [None]:
#Convert dictionary to DataFrame through concatination of dictionary values
Firms_SPY500_30 = pd.concat(d.values(),axis = 1, keys = Symbols)
Firms_SPY500_30.columns.names = ['Symbol', 'Info']
Firms_SPY500_30.to_pickle('Firms_SPY500_30.pkl')
Firms_SPY500_30.head(3)

In [None]:
#DataFrame of solely the adjusted returns. , talk about why adj vs close
Firms_SPY500_30_Adj = Firms_SPY500_30.xs(key = 'Adj Close', axis = 1, level='Info')
Firms_SPY500_30_Adj.head(1)

In [None]:
#Creates DataFrame of Daily Returns for each stock.
Daily_Return = pd.DataFrame()
for symbol in Symbols:
    Daily_Return[symbol] = Firms_SPY500_30_Adj[symbol].pct_change()
Daily_Return.drop(labels = start, axis = 0, inplace = True)
Daily_Return.isna().values.any()

In [None]:
#Creates DataFrame for the cumulative returns of each stock
Daily_Return_Cum = (Daily_Return+1).cumprod() - 1

In [None]:
#Create DataFrame of 30 day (total) cumulative return for each stock.
Returns_30day = pd.DataFrame(Daily_Return_Cum.iloc[-1])
Returns_30day.columns = ['30d Return']
Returns_30day.index.names = [('Symbol')]
Returns_30day_info = pd.DataFrame(SPY_Firms.merge(Returns_30day, on = 'Symbol'))
Returns_30day_info.drop(columns = ['Unnamed: 0', 'Headquarters Location'], inplace = True)
Returns_30day_info.sort_values('Symbol') 

In [None]:
#Bar chart of top performing firms corresponding sector.
Top_Firms = Returns_30day_info.sort_values('30d Return', ascending = False).head(50)
sns.barplot(y = Top_Firms['GICS Sector'].unique() ,x = Top_Firms['GICS Sector'].value_counts(), color = 'b')
plt.title('Sectors of Top Performing Firms')
plt.savefig('Sectors of Top Performing Firms.png', bbox_inches = 'tight')

In [None]:
#Bar chart of worst firms correspoding sectors
Bottom_Firms = Returns_30day_info.sort_values('30d Return', ascending = True).head(50)
sns.barplot(y = Bottom_Firms['GICS Sector'].unique() ,x = Bottom_Firms['GICS Sector'].value_counts(), color = 'b')
plt.title('Sectors of Worst Performing Firms')
plt.savefig('Sectors of Worst Performing Firms.png', bbox_inches = 'tight')

In [None]:
#Check for over  representation
fig = plt.figure(figsize = (6,6))
Returns_30day_info['GICS Sector'].value_counts().plot(kind = 'pie', labels = Returns_30day_info['GICS Sector'].unique(), autopct='%1.1f%%', colormap = 'tab20')
plt.ylabel('')
plt.title('S&P500 Sectors')
plt.savefig('S&P 500 Sectors.png', bbox_inches = 'tight')

In [None]:
#Insight 2. Measure of skewness and distribution, we can see IT is very spread out vs utilites compact.
fig = plt.figure(figsize = (12,5))
a = sns.boxplot(x = Returns_30day_info['GICS Sector'], y = Returns_30day_info['30d Return'],data = Returns_30day_info, showfliers = True, color = 'dodgerblue')
a.set_xticklabels(a.get_xticklabels(),rotation=45)
plt.axhline(y=0 ,c = 'black', alpha = 1, ls = '--')
plt.title('Total 30 Day Return by Sector', pad = 20)
plt.savefig('Boxplot of Firms by Sector.png', bbox_inches = 'tight')

In [None]:
#Repeat for standard Deviation.
SPY_Firms_30d_STD = pd.DataFrame(Firms_SPY500_30_Adj.std())
SPY_Firms_30d_STD.columns = ['30d Standard Deviation']
SPY_Firms_30d_STD = SPY_Firms.merge(SPY_Firms_30d_STD, on = 'Symbol')
SPY_Firms_30d_STD.drop(columns = ['Unnamed: 0', 'Headquarters Location'], inplace = True)
SPY_Firms_30d_STD.sort_values(by='30d Standard Deviation', inplace = True)

In [None]:
#Top 50 volatile stock
Most_vol = SPY_Firms_30d_STD.tail(50)
a = pd.DataFrame(Most_vol['GICS Sector'].value_counts())
a.index.names = ['Stock']
a.reset_index(inplace = True)
sns.barplot(x = 'GICS Sector', y = 'Stock', data = a, color = 'b')
plt.title("Sectors of Most Volitile Firms")
plt.ylabel('')
plt.xlabel('Number of Firms')
plt.savefig('Sectors of Most Volitile Firms.png', bbox_inches = 'tight')

In [None]:
#Top 50 volatile stock
Least_vol = SPY_Firms_30d_STD.head(50)
a = pd.DataFrame(Least_vol['GICS Sector'].value_counts())
a.index.names = ['Stock']
a.reset_index(inplace = True)
sns.barplot(x = 'GICS Sector', y = 'Stock', data = a, color = 'b')
plt.title("Sectors of Least Volitile Firms")
plt.ylabel('')
plt.xlabel('Number of Firms')
plt.savefig('Sectors of Least Volitile Firms.png', bbox_inches = 'tight')