In [1]:
pip install yfinance yesg PyPortfolioOpt portfolio-backtest riskfolio-lib plotly scikit-learn

Collecting yesg
  Downloading yesg-2.1.1.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting PyPortfolioOpt
  Downloading pyportfolioopt-1.5.6-py3-none-any.whl.metadata (22 kB)
Collecting portfolio-backtest
  Downloading portfolio_backtest-0.3.0-py3-none-any.whl.metadata (7.3 kB)
Collecting riskfolio-lib
  Downloading Riskfolio_Lib-6.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting arch>=7.0 (from riskfolio-lib)
  Downloading arch-7.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting xlsxwriter>=3.1.2 (from riskfolio-lib)
  Downloading XlsxWriter-3.2.0-py3-none-any.whl.metadata (2.6 kB)
Collecting pybind11>=2.10.1 (from riskfolio-lib)
  Downloading pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)
Downloading pyportfolioopt-1.5.6-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hD

In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import scipy.optimize as opt
import yesg
#import plotly.express as px
#import seaborn as sns
import plotly.graph_objects as go
#from plotly.subplots import make_subplots
#import riskfolio as rp
import warnings
import requests
from io import BytesIO, StringIO
import pypfopt as pf
#warnings.filterwarnings("ignore")

In [41]:
# Function to calculate the cost of transactions
def transaction_costs(w, w0, TC):
    return np.sum(TC * np.abs(w - w0)**2)

# Modified objective function to include transaction costs
def objective(w, cov_mat, w0, TC, factor_TC):
    # Original variance-based function
    s = 0
    for i in range(len(w)):
        for j in range(len(w)):
            s += (w[i] * (cov_mat @ w)[i] - w[j] * (cov_mat @ w)[j])**2
    # Add transaction costs as a penalty term
    return s + factor_TC * transaction_costs(w, w0, TC)

def rp_weights(df, w0= np.array([0.2, 0.2, 0.2, 0.3, 0.1]), TC = np.array([0.0005, 0.0005, 0.0005, 0.0005, 0.0005]), factor_TC = .0001):
    """
    Function to calculate the weights for the risk parity portfolio
    df: DataFrame with the ETF returns
    w0: Initial weights (current portfolio)
    TC: Transaction costs (5 bps)
    factor_TC: Scaling factor for transaction costs (tune this as needed)
    """
    cov_mat = df.cov().values  # Covariance matrix of ETF returns

    # Constraints: weights sum to 1 and are long-only
    constraints = [{'type': 'eq', 'fun': lambda w: np.sum(w) - 1},  # Sum of weights = 1
                   {'type': 'ineq', 'fun': lambda w: w}]            # Weights must be positive (long only)

    # Optimize
    result = opt.minimize(objective, w0, args=(cov_mat, w0, TC, factor_TC), constraints=constraints, tol=1e-20)
    w_opt = result.x  # Optimized weights
    return w_opt


def rp_weights_1(df, w0= np.array([0.25, 0.25, 0.25, 0.25]), TC = np.array([0.0005, 0.0005, 0.0005, 0.0005]), factor_TC = .0001):
    """
    Function to calculate the weights for the risk parity portfolio
    df: DataFrame with the ETF returns
    w0: Initial weights (current portfolio)
    TC: Transaction costs (5 bps)
    factor_TC: Scaling factor for transaction costs (tune this as needed)
    """
    cov_mat = df.cov().values  # Covariance matrix of ETF returns

    # Constraints: weights sum to 1 and are long-only
    constraints = [{'type': 'eq', 'fun': lambda w: np.sum(w) - 1},  # Sum of weights = 1
                   {'type': 'ineq', 'fun': lambda w: w}]            # Weights must be positive (long only)

    # Optimize
    result = opt.minimize(objective, w0, args=(cov_mat, w0, TC, factor_TC), constraints=constraints, tol=1e-20)
    w_opt = result.x  # Optimized weights
    return w_opt

def rp_weights_2(df, w0= np.array([0.15, 0.15, 0.15, 0.15, 0.25, 0.15]), TC = np.array([0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005]), factor_TC = .0001):
    """
    Function to calculate the weights for the risk parity portfolio
    df: DataFrame with the ETF returns
    w0: Initial weights (current portfolio)
    TC: Transaction costs (5 bps)
    factor_TC: Scaling factor for transaction costs (tune this as needed)
    """
    cov_mat = df.cov().values  # Covariance matrix of ETF returns

    # Constraints: weights sum to 1 and are long-only
    constraints = [{'type': 'eq', 'fun': lambda w: np.sum(w) - 1},  # Sum of weights = 1
                  {'type': 'ineq', 'fun': lambda w: w}]            # Weights must be positive (long only)

    # Optimize
    result = opt.minimize(objective, w0, args=(cov_mat, w0, TC, factor_TC), constraints=constraints, tol=1e-20)
    w_opt = result.x  # Optimized weights
    return w_opt

In [4]:
def min_var_weights(train_data):
    """Effectue l'optimisation Min-Var et retourne les poids optimaux."""
    try:
        mean_returns = train_data.mean()
        cov_matrix = train_data.cov()
        ef = pf.EfficientFrontier(mean_returns, cov_matrix)
        weights = ef.min_volatility()
        return np.array(list(weights.values()))
    except Exception as e:
        print(f"Erreur lors du calcul des poids du min-var portfolio : {e}")
        return None

In [42]:
def calculate_metrics(portfolio_returns, benchmark_returns, slice):
    """Calcule les métriques de performance pour une tranche donnée."""
    try:
        cumulative_return = float((1 + portfolio_returns).prod() - 1)
        cov_matrix = np.cov(portfolio_returns, benchmark_returns)
        beta = cov_matrix[0, 1] / cov_matrix[1, 1]
        expected_annual_return = float(portfolio_returns.mean() * 252)
        volatility = float(portfolio_returns.std() * np.sqrt(i))
        sharpe_ratio = float(cumulative_return / volatility)
        treynor_ratio = float(cumulative_return / beta)


        return {
            "Slice": slice,
            "Expected Annual Return": expected_annual_return,
            "Volatility": volatility,
            "Sharpe Ratio": sharpe_ratio,
            "Treynor Ratio": treynor_ratio,
            "Cumulative Return": cumulative_return
        }
    except Exception as e:
        print(f"Erreur lors du calcul des métriques de la tranche {i} : {e}")
        return None


## Fonction pour obtenir les données de nos nouveaux etf

In [47]:
import pandas as pd
import json

def get_data_from_etf_files(start_date, end_date, etf_files, benchmark_etf_path):
    try:
        # Chargement de l'ETF de référence (benchmark) à partir du fichier JSON
        with open(benchmark_etf_path, 'r') as f:
            etf_data = json.load(f)

        # Convertir les données de l'ETF benchmark en Series
        benchmark_prices = pd.Series(etf_data, name="Benchmark")
        benchmark_prices.index = pd.to_datetime(benchmark_prices.index)  # Conversion des dates

        # Filtrer les données selon la période demandée
        benchmark_prices = benchmark_prices.loc[start_date:end_date]

        # Calcul des rendements journaliers pour l'ETF de référence
        benchmark_returns = benchmark_prices.pct_change().dropna()

        # Dictionnaire pour stocker les rendements des autres ETF
        etf_returns = {}

        # Gestion des autres ETF
        for etf_name, file_path in etf_files.items():
            with open(file_path, 'r') as f:
                # Charger les données JSON pour chaque ETF
                etf_data = json.load(f)

            # Convertir en DataFrame
            etf_prices = pd.Series(etf_data, name=etf_name)
            etf_prices.index = pd.to_datetime(etf_prices.index)  # Conversion des dates

            # Filtrer les dates selon la période demandée
            etf_prices = etf_prices.loc[start_date:end_date]

            # Calcul des rendements journaliers
            etf_return = etf_prices.pct_change().dropna()
            etf_returns[etf_name] = etf_return

        # Combine les rendements des ETF dans un DataFrame
        etf_returns = pd.DataFrame(etf_returns)

    except Exception as e:
        print(f"Erreur lors de la récupération ou du traitement des données : {e}")
        return None, None

    return benchmark_returns, etf_returns


In [60]:
etf_files = {
    'ETF_div': '/content/SP500DividendETFPrices.json',
    'ETF_low_vol': '/content/SP500LowVolETFPrices.json',
    'ETF_quality': '/content/SP500QualityETFPrices.json',
    'ETF_momentum': '/content/SP500MomentumETFPrices.json',

    'ETF_value': '/content/SP500ValueETFPrices.json'
}


start_date = '2020-01-01'
end_date = '2024-11-29'

benchmark_etf_path = '/content/SP500MarketCapETFPrices.json'

benchmark_returns, etf_returns = get_data_from_etf_files(start_date, end_date, etf_files, benchmark_etf_path)

print("Rendements du benchmark :")
print(benchmark_returns)

print("Rendements des ETF :")
print(etf_returns)


Rendements du benchmark :
2020-01-03   -0.002773
2020-01-06   -0.000394
2020-01-07    0.003816
2020-01-08    0.001143
2020-01-09    0.008104
                ...   
2024-11-22    0.000805
2024-11-25   -0.003452
2024-11-26    0.010954
2024-11-27   -0.000790
2024-11-29    0.000928
Name: Benchmark, Length: 1236, dtype: float64
Rendements des ETF :
             ETF_div  ETF_low_vol  ETF_quality  ETF_momentum  ETF_value
2020-01-03 -0.003113    -0.003367    -0.003536      0.001864  -0.006339
2020-01-06  0.001009     0.000945    -0.000334      0.003396   0.001226
2020-01-07 -0.001337    -0.002447     0.000145     -0.001097  -0.001099
2020-01-08  0.003772     0.004149     0.001750      0.004741   0.003591
2020-01-09  0.005323     0.006343     0.006971      0.006226   0.004527
...              ...          ...          ...           ...        ...
2024-11-22  0.006274     0.006673     0.005810      0.006284   0.007720
2024-11-25  0.020360     0.006335     0.008515      0.007722   0.002012
2024-1

In [55]:
def convert_metrics(metrics_list):
    try:
        metrics_df = pd.DataFrame(metrics_list)
        metrics_df = metrics_df.set_index("Slice")
    except KeyError as e:
        print(f"Erreur lors de la conversion en DataFrame : {e}")
        return
    except Exception as e:
        print(f"Erreur inattendue : {e}")
        return
    return metrics_df


In [61]:
def backtest(i, j, k, start_date, end_date, etf_files, benchmark_etf_path, strategy):
    # Désactiver les warnings pour les matrices non définies positives
    warnings.filterwarnings("ignore", message="You must convert self.cov to a positive definite matrix")

    try:
        # Vérification des paramètres d'entrée
        if j < i:
            raise ValueError("Le deuxième paramètre (j) doit être supérieur au premier (i).")
    except ValueError as e:
        print(f"Erreur d'entrée : {e}")
        return None

    # Étape 1 : Récupération des données
    benchmark_returns, etf_returns = get_data_from_etf_files(start_date, end_date, etf_files, benchmark_etf_path)
    if benchmark_returns is None or etf_returns is None:
        print("Erreur lors de la récupération des données des ETF ou du S&P 500")
        return None

    # Étape 2 : Initialisation des variables
    try:
        train_data = etf_returns.iloc[:j, :]
        test_data = etf_returns.iloc[j:, :]
        spy_test_data = benchmark_returns.iloc[j:]
        metrics_list = []
        start_idx = 0
        weights_list = []
    except Exception as e:
        print(f"Erreur lors de l'initialisation des variables : {e}")
        return None

    # Étape 3 : Optimisation pour chaque tranche
    try:
        while start_idx + i <= len(test_data) and start_idx + i <= len(spy_test_data):
            # Découpage des données de test
            slice_data = test_data.iloc[start_idx:start_idx + i]
            spy_slice_data = spy_test_data.iloc[start_idx:start_idx + i].to_numpy().flatten()

            # Calcul des poids du portefeuille
            print(f"Traitement de la tranche {start_idx + 1} à {start_idx + i}")
            if strategy == "ERC":
                try:
                    # Tentative avec la fonction rp_weights (5 ETF)
                    weights = rp_weights(train_data)
                except:
                    try:
                        # Si rp_weights échoue, tentative avec rp_weights_1 (4 ETF)
                        weights = rp_weights_1(train_data)
                    except:
                        try:
                            # Si rp_weights_1 échoue, tentative avec rp_weights_2 (6 ETF)
                            weights = rp_weights_2(train_data)
                        except Exception as e:
                            print(f"Erreur lors du calcul des poids ERC : {e}")
                            weights = None  # Si toutes les tentatives échouent
            elif strategy == "Equal weight":
                # Poids égaux pour chaque ETF
                weights = np.array([1 / len(etf_files)] * len(etf_files))

            elif strategy == "Benchmark":
                # Utilisation des rendements du S&P 500 comme benchmark
                portfolio_returns = spy_slice_data
                weights = None  # Pas de poids à optimiser dans ce cas

            elif strategy == "Min-var":
                # Optimisation du portefeuille à variance minimale
                weights = min_var_weights(train_data)

            else:
                print("Stratégie non reconnue, veuillez choisir entre 'ERC', 'Equal weight', 'Benchmark' et 'Min-var'")
                return None

            if weights is not None:
                weights_list.append(weights)
                portfolio_returns = slice_data @ weights
            else:
                portfolio_returns = spy_slice_data  # Si les poids sont None, on utilise le benchmark

            # Calcul des métriques
            metrics_list.append(calculate_metrics(portfolio_returns, spy_slice_data, len(metrics_list) + 1))

            # Mise à jour des données d'entraînement
            train_data = pd.concat([train_data, test_data.iloc[start_idx:start_idx + k]]).iloc[-j:]
            start_idx += k

    except Exception as e:
        print(f"Erreur lors de l'optimisation ou du calcul des métriques : {e}")
        return None

    # Étape 6 : Conversion en DataFrame
    metrics_df = convert_metrics(metrics_list)
    if metrics_df is None:
        print("Erreur lors de la conversion des métriques en DataFrame")
        return None

    # Si tout s'est bien passé, on retourne les résultats
    print(f"Backtest terminé pour la stratégie: {strategy}")
    return metrics_df, weights_list


In [62]:
# Définir les paramètres
start_date = '2020-01-01'
end_date = '2024-11-29'
i = 5
j = 22
k = i
strategies = ["Equal weight", "Benchmark", "Min-var", "ERC"]



# Initialiser les DataFrames pour stocker les résultats
sharpe_ratios = pd.DataFrame(columns=strategies)
cumulative_returns = pd.DataFrame(columns=strategies)

# Backtest pour chaque stratégie
for strategy in strategies:
    metrics_df, weights_list = backtest(i, j, k, start_date, end_date, etf_files, benchmark_etf_path, strategy)

    # Vérifier si le backtest a réussi
    if metrics_df is not None:
        sharpe_ratios[strategy] = metrics_df["Sharpe Ratio"]
        cumulative_returns[strategy] = metrics_df["Cumulative Return"]
    else:
        print(f"Backtest échoué pour la stratégie : {strategy}")

# Afficher les résultats
print("Ratios de Sharpe :")
print(sharpe_ratios)

print("\nRendements cumulés :")
print(cumulative_returns)


Traitement de la tranche 1 à 5
Traitement de la tranche 6 à 10
Traitement de la tranche 11 à 15
Traitement de la tranche 16 à 20
Traitement de la tranche 21 à 25
Traitement de la tranche 26 à 30
Traitement de la tranche 31 à 35
Traitement de la tranche 36 à 40
Traitement de la tranche 41 à 45
Traitement de la tranche 46 à 50
Traitement de la tranche 51 à 55
Traitement de la tranche 56 à 60
Traitement de la tranche 61 à 65
Traitement de la tranche 66 à 70
Traitement de la tranche 71 à 75
Traitement de la tranche 76 à 80
Traitement de la tranche 81 à 85
Traitement de la tranche 86 à 90
Traitement de la tranche 91 à 95
Traitement de la tranche 96 à 100
Traitement de la tranche 101 à 105
Traitement de la tranche 106 à 110
Traitement de la tranche 111 à 115
Traitement de la tranche 116 à 120
Traitement de la tranche 121 à 125
Traitement de la tranche 126 à 130
Traitement de la tranche 131 à 135
Traitement de la tranche 136 à 140
Traitement de la tranche 141 à 145
Traitement de la tranche 14

In [63]:
def plot_data(data):
    # Create a figure
    fig = go.Figure()

    # Loop through each column in the DataFrame and add a trace for each
    for column in data.columns:
        fig.add_trace(go.Scatter(x=data.index, y=data[column], mode='lines', name=column))

    # Show the plot
    fig.show()

def box_plot_data(data):
    fig = go.Figure()

    for column in data.columns:
        fig.add_trace(go.Box(y=data[column], name=column))

    fig.update_layout(
        title="Box Plot",
        yaxis_title="Values",
        xaxis_title="Columns",
    )

    fig.show()

def plot_moving_average_data(data):
    # Apply a rolling mean (moving average) for smoothing
    smoothed_sr = data.rolling(window=30, min_periods=1).mean()  # 5-period rolling average

    fig = go.Figure()

    for column in smoothed_sr.columns:
        fig.add_trace(go.Scatter(x=smoothed_sr.index, y=smoothed_sr[column], mode='lines', name=column))

    # Update layout for better visualization
    fig.update_layout(
        title="Smoothed Trends of Each Column",
        xaxis_title="Index",
        yaxis_title="Smoothed Values",
    )

    fig.show()

In [64]:
sharpe_ratios.describe()
plot_data(sharpe_ratios)
box_plot_data(sharpe_ratios)
plot_moving_average_data(sharpe_ratios)