In [1]:
import pandas as pd
import yfinance as yf
from tqdm import tqdm
import time
import random
import numpy as np
import os
from ipywidgets import widgets, VBox
from datetime import datetime
from IPython.display import display, clear_output
import warnings
warnings.filterwarnings("ignore")

## 1. Import datas

In [2]:
index_df = pd.read_csv(os.getcwd() + '/S&P 500 - 2024.csv', index_col = 'Ticker')

In [None]:
for ticker in tqdm(index_df.index):
    try:
        history = yf.Ticker(ticker).history('max')
        history = history.resample('B').last().ffill()
        history = history.tz_localize(None)
        history.to_csv(os.getcwd() + '/history/' + ticker + '.csv')
    except:
        print(ticker)
    time.sleep(random.uniform(3,5))

  0%|          | 0/428 [00:00<?, ?it/s]

 24%|██▍       | 102/428 [08:36<26:35,  4.90s/it]BRK.B: No timezone found, symbol may be delisted


BRK.B


100%|██████████| 428/428 [35:45<00:00,  5.01s/it]


In [9]:
for ticker in tqdm(index_df.index):
    try:
        cashflow = yf.Ticker(ticker).cashflow
        cashflow.to_csv(os.getcwd() + '/cashflow/' + ticker + '.csv')
    except:
        print(ticker)
    time.sleep(random.uniform(1,3))

100%|██████████| 503/503 [19:09<00:00,  2.28s/it]


In [None]:
for ticker in tqdm(index_df.index):
    try:
        income_stmt = yf.Ticker(ticker).income_stmt
        income_stmt.to_csv(os.getcwd() + '/income_stmt/' + ticker + '.csv')
    except:
        print(ticker)
    time.sleep(random.uniform(3,5))

100%|██████████| 323/323 [22:55<00:00,  4.26s/it]


In [8]:
for ticker in tqdm(index_df.index):
    try:
        balance_sheet = yf.Ticker(ticker).balance_sheet
        balance_sheet.to_csv(os.getcwd() + '/balance_sheet/' + ticker + '.csv')
    except:
        print(ticker)
    time.sleep(random.uniform(2,4))

100%|██████████| 503/503 [27:51<00:00,  3.32s/it]


## 2. Company size analysis

In [None]:
total_asset_threshold = 4000000000
total_revenue_threshold = 3000000000
ticker_list = list(index_df.index)

In [None]:
company_size_df = pd.DataFrame()
for ticker in tqdm(ticker_list):
    
    total_assets = balance_sheet_df.loc['Total Assets', ticker]
    if total_assets > total_asset_threshold:
        company_size_df.at[ticker, 'Total Assets'] = True
    else:
        company_size_df.at[ticker, 'Total Assets'] = False

    total_revenue = income_stmt_df.loc['Total Revenue', ticker]
    if total_revenue > total_revenue_threshold:
        company_size_df.at[ticker, 'Total Revenue'] = True
    else:
        company_size_df.at[ticker, 'Total Revenue'] = False

company_size_selected = list(company_size_df[(company_size_df['Total Assets'] | company_size_df['Total Revenue'])].index)

100%|██████████| 122/122 [00:00<00:00, 507.31it/s]


## 3. ESG analysis

## 4. Fundamental analysis

## 5. Momentum analysis

In [2]:
short_period = [5, 10, 20]
long_period = [50, 100, 200]
period_list = short_period + long_period
ticker_list = index_df = list(pd.read_csv(os.getcwd() + '/S&P 500 - 2024.csv', index_col = 'Ticker').index)

history_df = pd.DataFrame()
for ticker in tqdm(ticker_list):
    history_df[ticker] = pd.read_csv(os.getcwd() + f'/history/{ticker}.csv', index_col='Date')['Close']
history_df.index = pd.to_datetime(history_df.index)

100%|██████████| 503/503 [00:13<00:00, 37.84it/s]


In [None]:
# Initialize empty DataFrames to store the moving average differentials (MAD) and normalized MAD (MADN) for each ticker
MAD_df = pd.DataFrame()
MADN_df = pd.DataFrame()

# Loop through each ticker in the ticker list
for ticker in tqdm(ticker_list):
    
    # Get historical price data for the ticker, dropping any NaN values
    history = history_df[ticker].dropna()
    
    # Check if the history data is empty or if it has fewer entries than the maximum period in period_list
    if history.empty or len(history) < max(period_list):
        # If data is insufficient, assign NaN values for all periods in MAD and MADN
        for period in period_list:
            MAD_df.at[ticker, period] = np.nan
            MADN_df.at[ticker, period] = np.nan
    else:
        # Calculate MAD and MADN for each period if data is sufficient
        for period in period_list:
            # Calculate the moving average for the current period, removing NaN values
            MA = history.rolling(period).mean().dropna()
            
            # Calculate the moving average differential (change over 5 periods)
            MAD = MA.diff(5).dropna()
            
            # Store the last value of the MAD for the current period and ticker
            MAD_df.at[ticker, period] = MAD.iloc[-1]
            
            # Normalize the MAD values to a 0-100 scale and store the last value in MADN
            MADN = 100 * (MAD - MAD.min()) / (MAD.max() - MAD.min())
            MADN_df.at[ticker, period] = MADN.iloc[-1]

# Convert MAD values into boolean values (1 if positive, 0 if negative) for each period in MAD_df
BMAD_df = MAD_df.applymap(lambda x: 0 if x < 0 else 1)

# Calculate the sum of boolean values across short periods and long periods for each ticker
SBMAD_df = BMAD_df[short_period].sum(axis=1)  # Sum across short periods
LBMAD_df = BMAD_df[long_period].sum(axis=1)   # Sum across long periods

# Initialize an empty Series to store GPS categories for each ticker
GPS_df = pd.Series()

# Loop through each ticker to assign GPS categories based on SBMAD and LBMAD values
for ticker in tqdm(ticker_list):
    SBMAD = SBMAD_df[ticker]
    LBMAD = LBMAD_df[ticker]
    
    # Assign GPS categories based on the combination of SBMAD and LBMAD values
    if SBMAD >= 2 and LBMAD >= 2:
        GPS_df[ticker] = 'A'
    elif SBMAD < 2 and LBMAD < 2:
        GPS_df[ticker] = 'B'
    elif SBMAD >= 2 and LBMAD < 2:
        GPS_df[ticker] = 'C'
    elif SBMAD < 2 and LBMAD >= 2:
        GPS_df[ticker] = 'P'

# Calculate the overall grade for each ticker by averaging the normalized MADN across all periods
# Sort the results in descending order
GRADE_df = MADN_df.mean(axis=1).sort_values(ascending=False)


100%|██████████| 503/503 [00:06<00:00, 76.84it/s] 
100%|██████████| 503/503 [00:00<00:00, 1898.88it/s]


In [3]:
def eclairys(history, date, short_period=[5, 10, 20], long_period=[50, 100, 200]):
    
    # Convert the date to a timezone-naive datetime object
    date = pd.to_datetime(date).tz_localize(None)
    
    # Combine short and long periods into a single list for calculations
    period_list = short_period + long_period
    
    # Remove any rows with NaN values in the historical data
    history = history.dropna()
    
    # Initialize empty DataFrames to store moving averages (MA), moving average differentials (MAD), and normalized MADs (MADN)
    MA_df = pd.DataFrame()
    MAD_df = pd.DataFrame()
    MADN_df = pd.DataFrame()
    
    # Calculate MA, MAD, and MADN for each period in the period list
    for period in period_list:
        if history.empty or len(history) < period:  # Check if there is enough data for the given period
            MA_df[period] = np.nan
            MAD_df[period] = np.nan
            MADN_df[period] = np.nan
        else:
            # Calculate the moving average for the current period and drop NaN values
            MA = history.rolling(period).mean().dropna()
            MA_df[period] = MA
            
            # Calculate the moving average differential (change over 5 periods)
            MAD = MA.diff(5).dropna()
            MAD_df[period] = MAD
            
            # Normalize the MAD to a 0-100 scale
            MADN = 100 * (MAD - MAD.min()) / (MAD.max() - MAD.min())
            MADN_df[period] = MADN
    
    # Calculate the overall grade by averaging the normalized MADs across all periods
    GRADE = MADN_df.mean(axis=1)

    # Convert MAD values into boolean values (1 if positive, 0 if negative)
    BMAD = MAD_df.applymap(lambda x: 0 if x < 0 else 1)
    
    # Sum the boolean values for short and long periods separately
    SBMAD = BMAD[short_period].sum(axis=1)  # Short period Boolean Moving Average Differential
    LBMAD = BMAD[long_period].sum(axis=1)   # Long period Boolean Moving Average Differential

    # Initialize the GPS Series, which will store categorical labels based on SBMAD and LBMAD values
    GPS = pd.Series(index=SBMAD.index)
    
    # Assign GPS categories based on conditions for SBMAD and LBMAD
    for i in SBMAD.index:
        if SBMAD[i] >= 2 and LBMAD[i] >= 2:
            GPS[i] = 'A'
        elif SBMAD[i] < 2 and LBMAD[i] < 2:
            GPS[i] = 'B'
        elif SBMAD[i] >= 2 and LBMAD[i] < 2:
            GPS[i] = 'C'
        elif SBMAD[i] < 2 and LBMAD[i] >= 2:
            GPS[i] = 'P'
    
    # Create the output dictionary with summary information for the given date
    output = {
        'Ticker': history.name,               # Stock ticker or identifier
        'Date': date.strftime("%d/%m/%Y"),    # Date in DD/MM/YYYY format
        'Grade': GRADE.loc[date].round(0),    # Rounded grade value for the date
        'GPS': GPS.loc[date],                 # GPS category for the date
        'Price': history.loc[date].round(2),  # Rounded price for the date
    }
    
    # Add the moving averages to the output dictionary, prefixed with "MA"
    MA = MA_df.loc[date].round(2)
    MA.index = MA.index.map(lambda x: f"MA {x}")
    output = output | MA.to_dict()  # Merge moving averages into the output dictionary
    
    return output


In [4]:
ticker = 'MC.PA'
history = yf.Ticker(ticker).history('max')['Close'].tz_localize(None)
history.name = ticker
eclairys(history, date='31/10/2024')

{'Ticker': 'MC.PA',
 'Date': '31/10/2024',
 'Grade': 33.0,
 'GPS': 'B',
 'Price': 609.8,
 'MA 5': 623.58,
 'MA 10': 621.04,
 'MA 20': 632.36,
 'MA 50': 637.4,
 'MA 100': 659.87,
 'MA 200': 724.9}

In [5]:
# Widget pour entrer le ticker librement
ticker_input = widgets.Text(
    description='Ticker:',
    placeholder='Entrez un ticker',
    value='MC.PA'
)

# Widget pour la sélection de la date
date_picker = widgets.DatePicker(
    description='Date:',
    value=datetime(2024, 10, 31)  # Date par défaut
)

# Bouton de validation
run_button = widgets.Button(description="Valider")

# Fonction qui s'exécute lorsque le bouton est cliqué
def on_button_click(b):
    # Efface les sorties précédentes
    clear_output(wait=True)
    
    # Affiche les widgets de nouveau pour maintenir l'interface
    display(VBox([ticker_input, date_picker, run_button]))
    
    # Récupère la valeur du ticker entrée par l'utilisateur
    ticker = ticker_input.value  
    # Récupère la date sélectionnée
    date = date_picker.value  
    
    # Récupération de l'historique des prix pour le ticker saisi
    history = yf.Ticker(ticker).history(period='max')['Close'].tz_localize(None)
    history.name = ticker
    
    # Formatage de la date pour la fonction eclairys
    date_str = date.strftime('%d/%m/%Y')
    
    # Exécution de la fonction eclairys avec les paramètres sélectionnés
    output = eclairys(history, date=date_str)
    
    # Affichage des résultats
    print("\nRésultats de eclairys:")
    for key, value in output.items():
        print(f"{key}: {value}")

# Liaison du bouton avec la fonction de validation
run_button.on_click(on_button_click)

# Afficher les widgets ensemble
display(VBox([ticker_input, date_picker, run_button]))


VBox(children=(Text(value='MC.PA', description='Ticker:', placeholder='Entrez un ticker'), DatePicker(value=da…


Résultats de eclairys:
Ticker: MC.PA
Date: 31/10/2024
Grade: 33.0
GPS: B
Price: 609.8
MA 5: 623.58
MA 10: 621.04
MA 20: 632.36
MA 50: 637.4
MA 100: 659.87
MA 200: 724.9


## 6. Global results