# Smart_Stock_ML: Stock Clustering

In [135]:
# Import dependencies
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

## Import company data

In [204]:
#  Import company dataset
company_df = pd.read_csv('data/company_clean.csv')
company_df.head()

Unnamed: 0,Ticker,GICS Sector,Gender,Headquarters State,Salary_Bins
0,A,Health Care,M,California,"(15000000, 20000000]"
1,AAL,Industrials,M,Texas,"(0, 5000000]"
2,AAPL,Information Technology,M,California,"(50000000, 300000000]"
3,ABBV,Health Care,M,Illinois,"(25000000, 50000000]"
4,ABNB,Consumer Discretionary,M,California,"(50000000, 300000000]"


## Extract stock data

In [99]:
# Extract full dataset for all tickers on daily interval for date range 2015 - 2024
bad_tickers = []
mean_stocks_df = pd.DataFrame()

for index,row in company_df.iterrows():
    try:
        ticker = row["Ticker"]
        print(f'Download complete for ticker {ticker}')
        stock_data = yf.download(ticker, start="2015-01-01", end="2024-01-01", interval="1d")
        stock_df = pd.DataFrame(stock_data).reset_index()
        stock_df['Ticker'] = ticker
        stock_df['Year'] =  stock_df["Date"].iloc[0].year
        mean_stocks_df = pd.concat([mean_stocks_df, stock_df], ignore_index=True)
        
    except Exception as e:
        print(f'Error occured in stock download: {e}')
        bad_tickers.append(ticker)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

A
AAL
AAPL
ABBV
ABNB
ABT
ACGL



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ACN
ADBE
ADI
ADM
ADP
ADSK
AEE
AEP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


AES
AFL
AIG
AIZ
AJG
AKAM
ALB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ALGN
ALL
ALLE
AMAT
AMCR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


AMD
AME
AMGN
AMP
AMT
AMZN
ANET


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ANSS
AON
AOS
APA
APD
APH
APTV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ARE
ATO
AVB
AVGO
AVY
AWK
AXON


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


AXP
AZO
BA
BAC
BALL
BAX
BBWI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BF.B']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (1d 2015-01-01 -> 2024-01-01)')


BBY
BDX
BEN
BF.B
$BF.B: possibly delisted; No price data found  (1d 2015-01-01 -> 2024-01-01)
Error occured in stock download: single positional indexer is out-of-bounds
BG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


BIIB
BIO


[*********************100%%**********************]  1 of 1 completed


BK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


BKNG
BKR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


BLDR
BLK


[*********************100%%**********************]  1 of 1 completed


BMY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


BR
BRK.B


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


Error occured in stock download: single positional indexer is out-of-bounds
BRO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


BSX
BWA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


BX
BXP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


C
CAG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CAH
CARR


[*********************100%%**********************]  1 of 1 completed


CAT


[*********************100%%**********************]  1 of 1 completed


CB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CBOE
CBRE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CCI
CCL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CDNS
CDW


[*********************100%%**********************]  1 of 1 completed


CE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CEG
CF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CFG
CHD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CHRW
CHTR


[*********************100%%**********************]  1 of 1 completed


CI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CINF
CL


[*********************100%%**********************]  1 of 1 completed


CLX


[*********************100%%**********************]  1 of 1 completed


CMCSA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CME
CMG


[*********************100%%**********************]  1 of 1 completed


CMI


[*********************100%%**********************]  1 of 1 completed


CMS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CNC
CNP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


COF
COO


[*********************100%%**********************]  1 of 1 completed


COP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


COR
COST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CPAY
CPB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CPRT
CPT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CRL
CRM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CRWD
CSCO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CSGP
CSX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CTAS
CTLT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CTRA
CTSH


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CTVA
CVS


[*********************100%%**********************]  1 of 1 completed


CVX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


CZR
D


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


DAL
DAY


[*********************100%%**********************]  1 of 1 completed


DD


[*********************100%%**********************]  1 of 1 completed


DE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


DECK
DFS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


DG
DGX


[*********************100%%**********************]  1 of 1 completed


DHI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

DHR
DIS



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


DLR
DLTR


[*********************100%%**********************]  1 of 1 completed


DOC


[*********************100%%**********************]  1 of 1 completed


DOV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


DOW
DPZ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

DRI
DTE



[*********************100%%**********************]  1 of 1 completed


DUK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


DVA
DVN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


DXCM
EA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EBAY
ECL


[*********************100%%**********************]  1 of 1 completed


ED


[*********************100%%**********************]  1 of 1 completed


EFX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EG
EIX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EL
ELV


[*********************100%%**********************]  1 of 1 completed


EMN


[*********************100%%**********************]  1 of 1 completed


EMR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ENPH
EOG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EPAM
EQIX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EQR
EQT


[*********************100%%**********************]  1 of 1 completed


ES


[*********************100%%**********************]  1 of 1 completed


ESS


[*********************100%%**********************]  1 of 1 completed


ETN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

ETR



[*********************100%%**********************]  1 of 1 completed


ETSY
EVRG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EW
EXC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EXPD
EXPE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EXR
F


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


FANG
FAST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


FCX
FDS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


FDX
FE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


FFIV
FI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


FICO
FIS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

FITB
FMC



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


FOXA
FRT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


FSLR
FTNT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


FTV
GD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


GDDY
GE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


GEHC
GEN


[*********************100%%**********************]  1 of 1 completed


GEV


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GEV']: YFChartError("%ticker%: Data doesn't exist for startDate = 1420088400, endDate = 1704085200")
[*********************100%%**********************]  1 of 1 completed


Error occured in stock download: single positional indexer is out-of-bounds
GILD
GIS


[*********************100%%**********************]  1 of 1 completed


GL


[*********************100%%**********************]  1 of 1 completed


GLW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


GM
GNRC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


GOOGL
GPC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

GPN
GRMN



[*********************100%%**********************]  1 of 1 completed


GS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


GWW
HAL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


HAS
HBAN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


HCA
HD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

HES



[*********************100%%**********************]  1 of 1 completed

HIG



[*********************100%%**********************]  1 of 1 completed


HII
HLT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


HOLX
HON


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


HPE
HPQ


[*********************100%%**********************]  1 of 1 completed


HRL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


HSIC
HST


[*********************100%%**********************]  1 of 1 completed


HSY


[*********************100%%**********************]  1 of 1 completed


HUBB


[*********************100%%**********************]  1 of 1 completed


HUM


[*********************100%%**********************]  1 of 1 completed


HWM


[*********************100%%**********************]  1 of 1 completed


IBM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ICE
IDXX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

IEX





IFF


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


INCY
INTC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


INTU
INVH


[*********************100%%**********************]  1 of 1 completed


IP


[*********************100%%**********************]  1 of 1 completed


IPG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


IQV
IR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

IRM



[*********************100%%**********************]  1 of 1 completed


ISRG
IT


[*********************100%%**********************]  1 of 1 completed


ITW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

IVZ



[*********************100%%**********************]  1 of 1 completed


J
JBHT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


JBL
JCI


[*********************100%%**********************]  1 of 1 completed


JKHY


[*********************100%%**********************]  1 of 1 completed


JNJ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


JNPR
JPM


[*********************100%%**********************]  1 of 1 completed


K


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


KDP
KEY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


KEYS
KHC


[*********************100%%**********************]  1 of 1 completed


KIM


[*********************100%%**********************]  1 of 1 completed


KKR


[*********************100%%**********************]  1 of 1 completed


KLAC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

KMB



[*********************100%%**********************]  1 of 1 completed


KMI
KMX


[*********************100%%**********************]  1 of 1 completed


KO


[*********************100%%**********************]  1 of 1 completed


KR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


KVUE
L


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


LDOS
LEN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

LH





LHX


[*********************100%%**********************]  1 of 1 completed


LIN


[*********************100%%**********************]  1 of 1 completed


LKQ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

LLY
LMT



[*********************100%%**********************]  1 of 1 completed


LNT


[*********************100%%**********************]  1 of 1 completed


LOW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

LRCX



[*********************100%%**********************]  1 of 1 completed


LULU
LUV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


LVS
LW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


LYB
LYV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

MA



[*********************100%%**********************]  1 of 1 completed

MAA
MAR



[*********************100%%**********************]  1 of 1 completed


MAS


[*********************100%%**********************]  1 of 1 completed


MCD


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

MCHP



[*********************100%%**********************]  1 of 1 completed

MCK
MCO



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

MDLZ





MDT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


MET
META


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


MGM
MHK


[*********************100%%**********************]  1 of 1 completed


MKC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


MKTX
MLM


[*********************100%%**********************]  1 of 1 completed


MMC


[*********************100%%**********************]  1 of 1 completed


MMM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


MNST
MO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


MOH
MOS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


MPC
MPWR


[*********************100%%**********************]  1 of 1 completed


MRK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


MRNA
MRO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

MS



[*********************100%%**********************]  1 of 1 completed


MSCI
MSFT


[*********************100%%**********************]  1 of 1 completed


MSI


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

MTB



[*********************100%%**********************]  1 of 1 completed


MTCH
MTD


[*********************100%%**********************]  1 of 1 completed


MU


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


NCLH
NDAQ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


NDSN
NEE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

NEM



[*********************100%%**********************]  1 of 1 completed


NFLX
NI


[*********************100%%**********************]  1 of 1 completed


NKE


[*********************100%%**********************]  1 of 1 completed


NOC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


NOW
NRG


[*********************100%%**********************]  1 of 1 completed


NSC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

NTAP



[*********************100%%**********************]  1 of 1 completed


NTRS
NUE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


NVDA
NVR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


NWSA
NXPI


[*********************100%%**********************]  1 of 1 completed


O


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ODFL
OKE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

OMC



[*********************100%%**********************]  1 of 1 completed


ON
ORCL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ORLY
OTIS


[*********************100%%**********************]  1 of 1 completed


OXY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


PANW
PARA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


PAYC
PAYX


[*********************100%%**********************]  1 of 1 completed


PCAR


[*********************100%%**********************]  1 of 1 completed


PCG


[*********************100%%**********************]  1 of 1 completed


PEG


[*********************100%%**********************]  1 of 1 completed


PEP


[*********************100%%**********************]  1 of 1 completed


PFE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

PFG





PG


[*********************100%%**********************]  1 of 1 completed


PGR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

PH





PHM


[*********************100%%**********************]  1 of 1 completed


PKG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


PLD
PM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

PNC





PNR


[*********************100%%**********************]  1 of 1 completed


PNW


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


PODD
POOL


[*********************100%%**********************]  1 of 1 completed


PPG


[*********************100%%**********************]  1 of 1 completed


PPL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


PRU
PSA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


PSX
PTC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


PWR
PYPL


[*********************100%%**********************]  1 of 1 completed


QCOM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


QRVO
RCL


[*********************100%%**********************]  1 of 1 completed


REG


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


REGN
RF


[*********************100%%**********************]  1 of 1 completed


RJF


[*********************100%%**********************]  1 of 1 completed


RL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


RMD
ROK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

ROL





ROP


[*********************100%%**********************]  1 of 1 completed


ROST


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

RSG





RTX


[*********************100%%**********************]  1 of 1 completed


RVTY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


SBAC
SBUX


[*********************100%%**********************]  1 of 1 completed


SCHW


[*********************100%%**********************]  1 of 1 completed


SHW


[*********************100%%**********************]  1 of 1 completed


SJM


[*********************100%%**********************]  1 of 1 completed


SLB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


SMCI
SNA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


SNPS
SO


[*********************100%%**********************]  1 of 1 completed


SOLV


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SOLV']: YFChartError("%ticker%: Data doesn't exist for startDate = 1420088400, endDate = 1704085200")
[*********************100%%**********************]  1 of 1 completed

Error occured in stock download: single positional indexer is out-of-bounds
SPG





SPGI


[*********************100%%**********************]  1 of 1 completed


SRE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


STE
STLD


[*********************100%%**********************]  1 of 1 completed


STT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

STX





STZ


[*********************100%%**********************]  1 of 1 completed


SW


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SW']: YFChartError("%ticker%: Data doesn't exist for startDate = 1420088400, endDate = 1704085200")


Error occured in stock download: single positional indexer is out-of-bounds
SWK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


SWKS
SYF


[*********************100%%**********************]  1 of 1 completed


SYK


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

SYY



[*********************100%%**********************]  1 of 1 completed

T





TAP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


TDG
TDY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


TECH
TEL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


TER
TFC


[*********************100%%**********************]  1 of 1 completed


TFX


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

TGT





TJX


[*********************100%%**********************]  1 of 1 completed


TMO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


TMUS
TPR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


TRGP
TRMB


[*********************100%%**********************]  1 of 1 completed


TROW


[*********************100%%**********************]  1 of 1 completed


TRV


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

TSCO



[*********************100%%**********************]  1 of 1 completed


TSLA
TSN


[*********************100%%**********************]  1 of 1 completed


TT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


TTWO
TXN


[*********************100%%**********************]  1 of 1 completed


TXT


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


TYL
UAL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


UBER
UDR


[*********************100%%**********************]  1 of 1 completed


UHS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ULTA
UNH


[*********************100%%**********************]  1 of 1 completed


UNP


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

UPS
URI



[*********************100%%**********************]  1 of 1 completed


USB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

V



[*********************100%%**********************]  1 of 1 completed


VICI
VLO


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


VLTO
VMC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


VRSK
VRSN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


VRTX
VST


[*********************100%%**********************]  1 of 1 completed


VTR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

VTRS





VZ


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

WAB



[*********************100%%**********************]  1 of 1 completed


WAT
WBA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


WBD
WDC


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

WEC





WELL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

WFC



[*********************100%%**********************]  1 of 1 completed

WM





WMB


[*********************100%%**********************]  1 of 1 completed


WMT


[*********************100%%**********************]  1 of 1 completed


WRB


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

WST



[*********************100%%**********************]  1 of 1 completed


WTW
WY


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

WYNN





XEL


[*********************100%%**********************]  1 of 1 completed


XOM


[*********************100%%**********************]  1 of 1 completed


XYL


[*********************100%%**********************]  1 of 1 completed


YUM


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ZBH
ZBRA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ZTS


In [100]:
# Print bad tickers
bad_tickers

['BF.B', 'BRK.B', 'GEV', 'SOLV', 'SW']

In [101]:
# View dataset
mean_stocks_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1097023 entries, 0 to 1097022
Data columns (total 9 columns):
 #   Column     Non-Null Count    Dtype         
---  ------     --------------    -----         
 0   Date       1097023 non-null  datetime64[ns]
 1   Open       1097023 non-null  float64       
 2   High       1097023 non-null  float64       
 3   Low        1097023 non-null  float64       
 4   Close      1097023 non-null  float64       
 5   Adj Close  1097023 non-null  float64       
 6   Volume     1097023 non-null  int64         
 7   Ticker     1097023 non-null  object        
 8   Year       1097023 non-null  int64         
dtypes: datetime64[ns](1), float64(5), int64(2), object(1)
memory usage: 75.3+ MB


## Calculate metrics

In [162]:
# Calculate mean values on annual data, including annual return and variance
mean_stocks_df['Year'] = mean_stocks_df['Date'].dt.year
grouped_df = mean_stocks_df.groupby(['Ticker', 'Year'])
mean_values = grouped_df[['Open', 'High', 'Low', 'Close', 'Volume']].mean().rename(
    columns={'Open': 'MeanOpen', 'High': 'MeanHigh', 'Low': 'MeanLow', 'Close': 'MeanClose', 'Volume': 'MeanVolume'}
)

annual_return = grouped_df['Close'].apply(lambda x: (x.iloc[-1] - x.iloc[0]) / x.iloc[0])
annual_variance = grouped_df['Close'].var()

# Create fianl mean_stats dataframe
mean_stats_df = mean_values.copy()
mean_stats_df['AnnualReturn'] = annual_return
mean_stats_df['AnnualVariance'] = annual_variance

In [163]:
# Display results
mean_stats_df

Unnamed: 0_level_0,Unnamed: 1_level_0,MeanOpen,MeanHigh,MeanLow,MeanClose,MeanVolume,AnnualReturn,AnnualVariance
Ticker,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,2015,39.590516,39.941389,39.234643,39.602143,2.560682e+06,0.030819,5.736657
A,2016,43.338492,43.731587,42.970437,43.369960,2.083069e+06,0.119685,12.947075
A,2017,59.322271,59.725896,58.913386,59.329681,1.850937e+06,0.440525,44.445197
A,2018,67.223705,67.864462,66.522908,67.169363,2.592049e+06,-0.002071,9.873542
A,2019,75.170714,75.822103,74.499524,75.211825,2.157036e+06,0.298676,24.061491
...,...,...,...,...,...,...,...,...
ZTS,2019,110.319841,111.279087,109.404920,110.373135,2.109394e+06,0.566643,202.899714
ZTS,2020,144.740039,146.621818,142.957194,144.786205,2.024208e+06,0.233786,266.081913
ZTS,2021,188.483095,190.247302,186.854960,188.649088,1.701269e+06,0.491717,640.111215
ZTS,2022,171.595896,173.568407,169.238367,171.307928,2.188180e+06,-0.373798,402.527451


In [182]:
# Reset index
mean_stats_df = mean_stats_df.reset_index().set_index('Ticker')
mean_stats_df

Unnamed: 0_level_0,Year,MeanOpen,MeanHigh,MeanLow,MeanClose,MeanVolume,AnnualReturn,AnnualVariance
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,2015,39.590516,39.941389,39.234643,39.602143,2.560682e+06,0.030819,5.736657
A,2016,43.338492,43.731587,42.970437,43.369960,2.083069e+06,0.119685,12.947075
A,2017,59.322271,59.725896,58.913386,59.329681,1.850937e+06,0.440525,44.445197
A,2018,67.223705,67.864462,66.522908,67.169363,2.592049e+06,-0.002071,9.873542
A,2019,75.170714,75.822103,74.499524,75.211825,2.157036e+06,0.298676,24.061491
...,...,...,...,...,...,...,...,...
ZTS,2019,110.319841,111.279087,109.404920,110.373135,2.109394e+06,0.566643,202.899714
ZTS,2020,144.740039,146.621818,142.957194,144.786205,2.024208e+06,0.233786,266.081913
ZTS,2021,188.483095,190.247302,186.854960,188.649088,1.701269e+06,0.491717,640.111215
ZTS,2022,171.595896,173.568407,169.238367,171.307928,2.188180e+06,-0.373798,402.527451


## Unsupervised learning: K-Means

### Preprocessing

In [185]:
# Scale numeric values
mean_scaled = StandardScaler().fit_transform(
    mean_stats_df[['Year','MeanOpen', 'MeanHigh', 'MeanLow', 'MeanClose', 'MeanVolume',
       'AnnualReturn', 'AnnualVariance']]
)

In [190]:
# Create a DataFrame with the scaled data
mean_scaled_df = pd.DataFrame(
    mean_scaled,
    columns=['Year', 'MeanOpen', 'MeanHigh', 'MeanLow', 'MeanClose', 'MeanVolume',
       'AnnualReturn', 'AnnualVariance']
)

# Copy the tickers names from the original data
mean_scaled_df["Ticker"] = mean_stats_df.index
# # Set the Ticker column as index
mean_scaled_df = mean_scaled_df.set_index("Ticker")

# # Display sample data
mean_scaled_df

Unnamed: 0_level_0,Year,MeanOpen,MeanHigh,MeanLow,MeanClose,MeanVolume,AnnualReturn,AnnualVariance
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,-1.564836,-0.382495,-0.382515,-0.382190,-0.382350,-0.145815,-0.302269,-0.083083
A,-1.177151,-0.365438,-0.365482,-0.364979,-0.365210,-0.165113,-0.054719,-0.082291
A,-0.789466,-0.292694,-0.293603,-0.291528,-0.292611,-0.174492,0.839018,-0.078835
A,-0.401781,-0.256734,-0.257028,-0.256470,-0.256948,-0.144547,-0.393886,-0.082629
A,-0.014096,-0.220566,-0.221267,-0.219721,-0.220364,-0.162124,0.443879,-0.081072
...,...,...,...,...,...,...,...,...
ZTS,-0.014096,-0.060599,-0.061923,-0.058908,-0.060417,-0.164049,1.190335,-0.061445
ZTS,0.373589,0.096051,0.096907,0.095670,0.096126,-0.167491,0.263121,-0.054511
ZTS,0.761274,0.295130,0.292960,0.297912,0.295656,-0.180539,0.981620,-0.013462
ZTS,1.148959,0.218275,0.218005,0.216750,0.216772,-0.160866,-1.429377,-0.039536


In [205]:
# Select catagorical variables of interest
catagorical_variables = company_df[['Ticker', 'GICS Sector',
       'Headquarters State', 'Gender', 'Salary_Bins']]

# Set ticker as index
catagorical_variables = catagorical_variables.set_index("Ticker")
catagorical_variables

Unnamed: 0_level_0,GICS Sector,Headquarters State,Gender,Salary_Bins
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,Health Care,California,M,"(15000000, 20000000]"
AAL,Industrials,Texas,M,"(0, 5000000]"
AAPL,Information Technology,California,M,"(50000000, 300000000]"
ABBV,Health Care,Illinois,M,"(25000000, 50000000]"
ABNB,Consumer Discretionary,California,M,"(50000000, 300000000]"
...,...,...,...,...
XYL,Industrials,New York,M,"(0, 5000000]"
YUM,Consumer Discretionary,Kentucky,M,"(15000000, 20000000]"
ZBH,Health Care,Indiana,M,"(10000000, 15000000]"
ZBRA,Information Technology,Illinois,M,"(10000000, 15000000]"


In [206]:
# Encode catagorical variables
stock_dummies = pd.get_dummies(catagorical_variables, dtype=int)
stock_dummies

Unnamed: 0_level_0,GICS Sector_Communication Services,GICS Sector_Consumer Discretionary,GICS Sector_Consumer Staples,GICS Sector_Energy,GICS Sector_Financials,GICS Sector_Health Care,GICS Sector_Industrials,GICS Sector_Information Technology,GICS Sector_Materials,GICS Sector_Real Estate,...,Headquarters State_Wisconsin,Gender_F,Gender_M,"Salary_Bins_(0, 5000000]","Salary_Bins_(10000000, 15000000]","Salary_Bins_(15000000, 20000000]","Salary_Bins_(20000000, 25000000]","Salary_Bins_(25000000, 50000000]","Salary_Bins_(5000000, 10000000]","Salary_Bins_(50000000, 300000000]"
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A,0,0,0,0,0,1,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
AAL,0,0,0,0,0,0,1,0,0,0,...,0,0,1,1,0,0,0,0,0,0
AAPL,0,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,1
ABBV,0,0,0,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
ABNB,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XYL,0,0,0,0,0,0,1,0,0,0,...,0,0,1,1,0,0,0,0,0,0
YUM,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
ZBH,0,0,0,0,0,1,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
ZBRA,0,0,0,0,0,0,0,1,0,0,...,0,0,1,0,1,0,0,0,0,0


In [207]:
# Concatenate the dummy variables with the scaled data DataFrame
sp500_clustering_df = pd.merge(mean_scaled_df, stock_dummies, on='Ticker', how="left").drop('Gender_M', axis=1)
# Display data sample
sp500_clustering_df = sp500_clustering_df.dropna()
sp500_clustering_df

Unnamed: 0_level_0,Year,MeanOpen,MeanHigh,MeanLow,MeanClose,MeanVolume,AnnualReturn,AnnualVariance,GICS Sector_Communication Services,GICS Sector_Consumer Discretionary,...,Headquarters State_Washington,Headquarters State_Wisconsin,Gender_F,"Salary_Bins_(0, 5000000]","Salary_Bins_(10000000, 15000000]","Salary_Bins_(15000000, 20000000]","Salary_Bins_(20000000, 25000000]","Salary_Bins_(25000000, 50000000]","Salary_Bins_(5000000, 10000000]","Salary_Bins_(50000000, 300000000]"
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A,-1.564836,-0.382495,-0.382515,-0.382190,-0.382350,-0.145815,-0.302269,-0.083083,0,0,...,0,0,0,0,0,1,0,0,0,0
A,-1.177151,-0.365438,-0.365482,-0.364979,-0.365210,-0.165113,-0.054719,-0.082291,0,0,...,0,0,0,0,0,1,0,0,0,0
A,-0.789466,-0.292694,-0.293603,-0.291528,-0.292611,-0.174492,0.839018,-0.078835,0,0,...,0,0,0,0,0,1,0,0,0,0
A,-0.401781,-0.256734,-0.257028,-0.256470,-0.256948,-0.144547,-0.393886,-0.082629,0,0,...,0,0,0,0,0,1,0,0,0,0
A,-0.014096,-0.220566,-0.221267,-0.219721,-0.220364,-0.162124,0.443879,-0.081072,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZTS,-0.014096,-0.060599,-0.061923,-0.058908,-0.060417,-0.164049,1.190335,-0.061445,0,0,...,0,0,1,0,0,1,0,0,0,0
ZTS,0.373589,0.096051,0.096907,0.095670,0.096126,-0.167491,0.263121,-0.054511,0,0,...,0,0,1,0,0,1,0,0,0,0
ZTS,0.761274,0.295130,0.292960,0.297912,0.295656,-0.180539,0.981620,-0.013462,0,0,...,0,0,1,0,0,1,0,0,0,0
ZTS,1.148959,0.218275,0.218005,0.216750,0.216772,-0.160866,-1.429377,-0.039536,0,0,...,0,0,1,0,0,1,0,0,0,0


### Apply the Elbow method

In [208]:
# Create a a list to store inertia values
inertia = []

# Create a a list to store the values of k
k_values = list(range(1, 11))

# Create a for-loop where each value of k and evaluate using the K-means algorithm
for k in k_values:
    k_model = KMeans(n_clusters=k, random_state=78)
    k_model.fit(sp500_clustering_df)
    inertia.append(k_model.inertia_)

In [209]:
# Create a Dictionary that holds the list values for k and inertia
elbow_data = {"k": k_values, "inertia": inertia}

# Create a DataFrame using the elbow_data Dictionary
df_elbow = pd.DataFrame(elbow_data)

# Review the DataFrame
df_elbow.head()

Unnamed: 0,k,inertia
0,1,46782.423965
1,2,34094.075179
2,3,30158.171069
3,4,26906.184429
4,5,23470.408528


In [210]:
# Plot the DataFrame
df_elbow.hvplot.line(
    x="k", 
    y="inertia", 
    title="Elbow Curve", 
    xticks=k
)


### Create Clusters

In [211]:
# Initialize the K-Means model with n_clusters=2
model = KMeans(n_clusters=5)

In [212]:
# Fit the model sp500_scaled_df DataFrame
model.fit(sp500_clustering_df)

In [213]:
# Predict the model segments
stock_clusters = model.predict(sp500_clustering_df)

# View the stock segments
stock_clusters[0:20]

array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0],
      dtype=int32)

In [214]:
# Create a new column in the DataFrame with the predicted clusters
sp500_clustering_df["StockCluster"] = stock_clusters

# Review the DataFrame
sp500_clustering_df.head()

Unnamed: 0_level_0,Year,MeanOpen,MeanHigh,MeanLow,MeanClose,MeanVolume,AnnualReturn,AnnualVariance,GICS Sector_Communication Services,GICS Sector_Consumer Discretionary,...,Headquarters State_Wisconsin,Gender_F,"Salary_Bins_(0, 5000000]","Salary_Bins_(10000000, 15000000]","Salary_Bins_(15000000, 20000000]","Salary_Bins_(20000000, 25000000]","Salary_Bins_(25000000, 50000000]","Salary_Bins_(5000000, 10000000]","Salary_Bins_(50000000, 300000000]",StockCluster
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A,-1.564836,-0.382495,-0.382515,-0.38219,-0.38235,-0.145815,-0.302269,-0.083083,0,0,...,0,0,0,0,1,0,0,0,0,0
A,-1.177151,-0.365438,-0.365482,-0.364979,-0.36521,-0.165113,-0.054719,-0.082291,0,0,...,0,0,0,0,1,0,0,0,0,0
A,-0.789466,-0.292694,-0.293603,-0.291528,-0.292611,-0.174492,0.839018,-0.078835,0,0,...,0,0,0,0,1,0,0,0,0,0
A,-0.401781,-0.256734,-0.257028,-0.25647,-0.256948,-0.144547,-0.393886,-0.082629,0,0,...,0,0,0,0,1,0,0,0,0,0
A,-0.014096,-0.220566,-0.221267,-0.219721,-0.220364,-0.162124,0.443879,-0.081072,0,0,...,0,0,0,0,1,0,0,0,0,1


### View Clusters

In [215]:
# Create a scatter plot - Annual Return vs Annual Variance - Color by Stock Cluster
sp500_clustering_df.hvplot.scatter(
    x="AnnualReturn",
    y="AnnualVariance",
    by="StockCluster",
    hover_cols = ["Ticker"], 
    title = "Scatter Plot by Stock Segment"
)

In [218]:
# Create a scatter plot - Annual Return vs Annual Variance - Color by CEO Gender
sp500_clustering_df.hvplot.scatter(
    x="AnnualReturn",
    y="AnnualVariance",
    by="Gender_F",
    hover_cols = ["Ticker"], 
    title = "Scatter Plot by Stock Segment"
)