In [1]:
import pandas as pd
import yfinance as yf
from tqdm import tqdm
import time
import random
import numpy as np
import os
import warnings
warnings.filterwarnings("ignore")

## 1. Import datas

In [2]:
index_df = pd.read_csv(os.getcwd() + '/S&P 500 - 2024.csv', index_col = 'Ticker')

In [None]:
for ticker in tqdm(index_df.index):
    try:
        history = yf.Ticker(ticker).history('max')
        history = history.resample('B').last().ffill()
        history = history.tz_localize(None)
        history.to_csv(os.getcwd() + '/history/' + ticker + '.csv')
    except:
        print(ticker)
    time.sleep(random.uniform(3,5))

  0%|          | 0/428 [00:00<?, ?it/s]

 24%|██▍       | 102/428 [08:36<26:35,  4.90s/it]BRK.B: No timezone found, symbol may be delisted


BRK.B


100%|██████████| 428/428 [35:45<00:00,  5.01s/it]


In [9]:
for ticker in tqdm(index_df.index):
    try:
        cashflow = yf.Ticker(ticker).cashflow
        cashflow.to_csv(os.getcwd() + '/cashflow/' + ticker + '.csv')
    except:
        print(ticker)
    time.sleep(random.uniform(1,3))

100%|██████████| 503/503 [19:09<00:00,  2.28s/it]


In [None]:
for ticker in tqdm(index_df.index):
    try:
        income_stmt = yf.Ticker(ticker).income_stmt
        income_stmt.to_csv(os.getcwd() + '/income_stmt/' + ticker + '.csv')
    except:
        print(ticker)
    time.sleep(random.uniform(3,5))

100%|██████████| 323/323 [22:55<00:00,  4.26s/it]


In [8]:
for ticker in tqdm(index_df.index):
    try:
        balance_sheet = yf.Ticker(ticker).balance_sheet
        balance_sheet.to_csv(os.getcwd() + '/balance_sheet/' + ticker + '.csv')
    except:
        print(ticker)
    time.sleep(random.uniform(2,4))

100%|██████████| 503/503 [27:51<00:00,  3.32s/it]


## 2. Company size analysis

In [None]:
total_asset_threshold = 4000000000
total_revenue_threshold = 3000000000
ticker_list = list(index_df.index)

In [None]:
company_size_df = pd.DataFrame()
for ticker in tqdm(ticker_list):
    
    total_assets = balance_sheet_df.loc['Total Assets', ticker]
    if total_assets > total_asset_threshold:
        company_size_df.at[ticker, 'Total Assets'] = True
    else:
        company_size_df.at[ticker, 'Total Assets'] = False

    total_revenue = income_stmt_df.loc['Total Revenue', ticker]
    if total_revenue > total_revenue_threshold:
        company_size_df.at[ticker, 'Total Revenue'] = True
    else:
        company_size_df.at[ticker, 'Total Revenue'] = False

company_size_selected = list(company_size_df[(company_size_df['Total Assets'] | company_size_df['Total Revenue'])].index)

100%|██████████| 122/122 [00:00<00:00, 507.31it/s]


## 3. ESG analysis

## 4. Fundamental analysis

## 5. Momentum analysis

In [43]:
period = 200
short_period = [5, 10, 20]
long_period = [50, 100, 200]
period_list = short_period + long_period
ticker_list = index_df = list(pd.read_csv(os.getcwd() + '/S&P 500 - 2024.csv', index_col = 'Ticker').index)

In [21]:
history_df = pd.DataFrame()
for ticker in tqdm(ticker_list):
    history_df[ticker] = pd.read_csv(os.getcwd() + f'/history/{ticker}.csv', index_col='Date')['Close']

100%|██████████| 503/503 [00:10<00:00, 49.10it/s]


In [52]:
MAD_df = pd.DataFrame()
MADN_df = pd.DataFrame()

for ticker in tqdm(ticker_list):
    history = history_df[ticker].dropna()
    if history.empty or len(history) < max(period_list):
        for period in period_list:
            MAD_df.at[ticker, period] = np.nan
            MADN_df.at[ticker, period] = np.nan
    else:
        for period in period_list:
            MA = history.rolling(period).mean().dropna() # Moving average
            MAD = MA.diff(5).dropna() # Moving average differential
            MAD_df.at[ticker, period] = MAD.iloc[-1]
            MADN = 100 * (MAD - MAD.min()) / (MAD.max() - MAD.min()) # Moving average differential normalized
            MADN_df.at[ticker, period] = MADN.iloc[-1]

100%|██████████| 503/503 [00:04<00:00, 104.19it/s]


In [53]:
BMAD_df = MAD_df.applymap(lambda x: 0 if x < 0 else 1)
SBMAD_df = BMAD_df[short_period].sum(axis=1)
LBMAD_df = BMAD_df[long_period].sum(axis=1)
GPS_df = pd.Series()
for ticker in tqdm(ticker_list):
    SBMAD = SBMAD_df[ticker]
    LBMAD = LBMAD_df[ticker]
    if SBMAD >= 2 and LBMAD >= 2:
        GPS_df[ticker] = 'A'
    elif SBMAD < 2 and LBMAD < 2:
        GPS_df[ticker] = 'B'
    elif SBMAD >= 2 and LBMAD < 2:
        GPS_df[ticker] = 'C'
    elif SBMAD < 2 and LBMAD >= 2:
        GPS_df[ticker] = 'P'

100%|██████████| 503/503 [00:00<00:00, 1981.76it/s]


In [56]:
GPS_df.head(50)

VZ       P
T        A
LYV      A
WBD      A
EA       A
TTWO     A
DIS      P
NFLX     A
META     P
MTCH     B
GOOG     A
GOOGL    A
CHTR     C
CMCSA    A
FOX      A
OMC      P
FOXA     P
IPG      B
NWSA     C
PARA     C
NWS      A
TMUS     A
APTV     B
BWA      P
TSLA     A
GM       A
F        B
EBAY     P
AMZN     A
LKQ      B
POOL     P
GPC      B
DRI      P
HLT      A
NCLH     A
CZR      P
SBUX     A
MGM      B
LVS      A
MCD      P
BKNG     A
CMG      P
ABNB     C
CCL      A
RCL      A
YUM      B
MAR      A
EXPE     A
WYNN     P
DPZ      B
dtype: object

In [54]:
MADN_df

Unnamed: 0,5,10,20,50,100,200
VZ,51.555534,34.468555,23.736596,65.621588,68.130256,60.905190
T,61.113491,66.809047,64.459486,93.602724,97.785940,97.290327
LYV,62.173213,64.886823,77.216625,86.229612,85.965654,80.499102
WBD,74.779200,67.321422,58.634376,50.989835,45.929402,46.900771
EA,62.802403,58.880615,60.740130,53.918990,64.833341,61.613686
...,...,...,...,...,...,...
SRE,56.363108,64.446847,68.915191,65.160806,65.141571,73.077081
AEE,45.228678,59.151411,58.800088,69.539096,97.103092,97.543452
DTE,39.444673,58.090057,61.666309,66.765190,75.864272,89.048413
NI,63.515949,74.010265,68.672318,74.771292,100.000000,99.641205


## 6. Global results