In [3]:
import pandas as pd 
import process
import numpy as np 
# Jerome path : r'C:\Users\33640\OneDrive\Documents\GitHub\Portfolio_clustering_project\Data\DataBase.csv'
# Nail path : '/Users/khelifanail/Documents/GitHub/Portfolio_clustering_project/Data/DataBase.csv'
df = pd.read_csv(r'/Users/khelifanail/Documents/GitHub/Portfolio_clustering_project/Data/DataBase.csv')

df.set_index('ticker', inplace=True)

df.columns = pd.to_datetime(df.columns.str[1:], format='%Y%m%d').strftime('%d/%m/%Y')

df_cleaned = df.fillna(0) # Utilisez la méthode fillna(0) pour remplacer les NaN par 0

df_cleaned = df_cleaned.transpose() ## WE WANT COLUMNS TO BE VECTOR OF RETURN FOR A GIVEN TICKER

In [2]:
from pypfopt.efficient_frontier import EfficientFrontier

def cov_forecast(beta, lookback_window, number_folds, historical_data):

    N = len(historical_data.columns)  # Number of assets, BEWARE TO THE SHAPE OF THE DATA FOR

    Ik_length = int((lookback_window[1]-lookback_window[0])/number_folds) # Number of days in each fold for the cross validation, has to be an integer

    # Initialize epsilon as a zero array with N elements
    epsilon = np.zeros(N)

    for k in range(number_folds):
        # Calculate EWA matrix 
        weighted_matrices = [(beta**(Ik_length-t)) * np.outer(historical_data.iloc[t + Ik_length*k], historical_data.iloc[t + Ik_length*k]) for t in range(Ik_length)]
        summed_weighted_matrices = np.sum(weighted_matrices, axis=0)
        E_matrix = (1 - beta) / (1 - beta**Ik_length) * summed_weighted_matrices
        
        # Calculate eigenvectors for the E matrix
        eigenvalues, eigenvectors = np.linalg.eigh(E_matrix)

        # Calculate epsilon terms for each eigenvector
        for i in range(N):
            ui = eigenvectors[:, i]
            # For each day in the Ik segment, project the data onto the eigenvector and square it
            epsilon_i_sum = np.sum([(np.dot(ui, historical_data.iloc[t + Ik_length*k])**2) for t in range(Ik_length)])
            # Accumulate the results in epsilon
            epsilon[i] += epsilon_i_sum.real / Ik_length

    # Average epsilon over K segments
    epsilon /= number_folds

    # Now, we calculate the forecasts using the last set of eigenvectors
    cov = pd.DataFrame(index=historical_data.columns, columns=historical_data.columns, data=np.sum([epsilon[i] * np.outer(eigenvectors[:, i], eigenvectors[:, i]) for i in range(N)], axis=0)).fillna(0.)

    return cov

def portfolio_returns(historical_data, evaluation_window, lookback_window, cov, eta, short_selling=True):

    ## we compute the markowitz weights using this forecast

    expected_returns = process.noised_array(y=historical_data.iloc[200,:], eta=eta)

    if short_selling:

        ef = EfficientFrontier(expected_returns=expected_returns, cov_matrix=cov, weight_bounds=(-1, 1))

    else:

        ef = EfficientFrontier(expected_returns=expected_returns, cov_matrix=cov, weight_bounds=(0, 1))

    ef.efficient_return(target_return=expected_returns.mean())

    markowitz_weights = ef.clean_weights()

    portfolio_returns = pd.DataFrame(index=df_cleaned.iloc[lookback_window[1]:lookback_window[1]+evaluation_window, :].index, columns=['return'], data=np.zeros(len(df_cleaned.iloc[lookback_window[1]:lookback_window[1]+evaluation_window, :].index)))

    for ticker, weight in markowitz_weights.items(): 

    ##  each time we add :            the present value of the return + the weighted "contribution" of the stock 'ticker' times is weight in the portfolio
        portfolio_returns['return'] = portfolio_returns['return'] + df_cleaned[ticker][lookback_window[1]:lookback_window[1]+evaluation_window]*weight

    return portfolio_returns

lookback_window = [0, 200]
evaluation_window = 1
beta = 0.95
K = 4  # Number of fold for the cross validation
eta = 0.1

cov = cov_forecast(beta=beta, lookback_window=lookback_window, number_folds=K, historical_data=df_cleaned)
ret = portfolio_returns(historical_data=df_cleaned, evaluation_window=evaluation_window, lookback_window=lookback_window, cov=cov, eta=eta)

    Your problem is being solved with the ECOS solver by default. Starting in 
    CVXPY 1.5.0, Clarabel will be used as the default solver instead. To continue 
    using ECOS, specify the ECOS solver explicitly using the ``solver=cp.ECOS`` 
    argument to the ``problem.solve`` method.
    


In [4]:
from PyFolioC import PyFolio
from PyFolioC import PyFolioC

historical_data = df_cleaned
number_of_repetitions = 10
lookback_window = [0, 50]
evaluation_window = 1
number_of_clusters = 38
cov_method = 'forecast'
sigma = 0.1
eta = 0.1
lookback_window = [0, 200]
evaluation_window = 1
beta = 0.95
K = 4  # Number of fold for the cross validation

portfolio = PyFolioC(number_of_repetitions=number_of_repetitions, historical_data=historical_data, lookback_window=lookback_window, evaluation_window=evaluation_window, number_of_clusters=number_of_clusters, sigma=sigma, eta=eta, short_selling=True, cov_method=cov_method, beta=beta, number_folds=K)

NameError: name 'clustering_method' is not defined

In [6]:
portfolio.final_weights


ticker,AA,ABM,ABT,ADI,ADM,ADX,AEE,AEG,AEM,AEP,...,XLI,XLK,XLP,XLU,XLV,XLY,XOM,XRX,YUM,ZTR
weight,-0.00076,0.00155,0.0016,-0.00571,0.00284,0.0008,0.00025,0.00244,0.00439,-0.00032,...,0.00187,0.00123,0.00355,0.00116,0.00221,0.0,0.00296,0.00155,0.00019,0.00282


In [46]:
consolidated_W = PyFolioC(number_of_repetitions=number_of_repetitions, historical_data=df_cleaned, lookback_window=lookback_window, evaluation_window=evaluation_window, number_of_clusters=number_of_clusters, sigma=sigma, eta=eta, clustering_method=clustering_method)

False

In [None]:
def calcul_somme(beta):

    T = len(consolidated_W.historical_data.columns)
    
    res = np.zeros((len(df_column[0]), len(df_column[0])))

    res = (1 - beta) / (1 - beta**T) * np.sum([(beta**(T-(t+1))) * np.outer(consolidated_W.historical_data[ticker], consolidated_W.historical_data[ticker]) for t, ticker in enumerate(consolidated_W.historical_data.columns)])

    return res

beta = 0.5

res = calcul_somme(beta=beta)
res 
