# TP4 - Modèles de Markov cachés


In [77]:
from pomegranate import *
from pandas import *
from functools import *
from itertools import *

import math
import numpy as np

In [78]:
def get_dataframe_from_csv(*args: str) -> tuple[DataFrame]:
    dataframes = []
    for file_name in args:
        df = read_csv(f'csv/{file_name}.csv')
        dataframes.append(df)
    return tuple(dataframes)

In [79]:
datas_frames = get_dataframe_from_csv('DJI', 'FCHI', 'FTSE', 'GDAXI', 'IXIC')
DJI, FCHI, FTSE, GDAXI, IXIC = datas_frames

In [80]:
def merge_adj_datas_frames(**kwargs: DataFrame) -> DataFrame:
    dfs_date_ajd: list[DataFrame] = []
    for key, df in kwargs.items():
        dfac = df[["Date", "Adj Close"]]
        dfac = dfac.rename(columns={"Adj Close": f"{key} Adj Close"})
        dfs_date_ajd.append(dfac)

    return reduce(lambda left, right: merge(left, right, how='outer'), dfs_date_ajd)

In [81]:
df_adj_close = merge_adj_datas_frames(DJI=DJI, FCHI=FCHI, FTSE=FTSE, GDAXI=GDAXI, IXIC=IXIC)
df_adj_close


Unnamed: 0,Date,DJI Adj Close,FCHI Adj Close,FTSE Adj Close,GDAXI Adj Close,IXIC Adj Close
0,2022-01-10,36068.871094,7115.770020,7445.299805,15768.269531,14942.830078
1,2022-01-11,36252.019531,7183.379883,7491.399902,15941.809570,15153.450195
2,2022-01-12,36290.320313,7237.189941,7551.700195,16010.320313,15188.389648
3,2022-01-13,36113.621094,7201.140137,7563.899902,16031.589844,14806.809570
4,2022-01-14,35911.808594,7143.000000,7543.000000,15883.240234,14893.750000
...,...,...,...,...,...,...
253,2022-06-20,,5920.089844,7121.799805,13265.599609,
254,2022-07-04,,5954.649902,7232.700195,12773.379883,
255,2022-09-05,,6093.220215,7287.399902,12760.780273,
256,2022-11-24,,6707.319824,7466.600098,14539.559570,


In [82]:
def fill_adj_close(df: DataFrame) -> DataFrame:
    df = df.copy()
    columns = df.loc[:, df_adj_close.columns != 'Date']
    for column in columns:
        for i in range(len(df)):
            if math.isnan(df.loc[i, column]):
                df.loc[i, column] = df.loc[i - 1, column]
    return df

In [83]:
df_adj_close = fill_adj_close(df_adj_close)
df_adj_close

Unnamed: 0,Date,DJI Adj Close,FCHI Adj Close,FTSE Adj Close,GDAXI Adj Close,IXIC Adj Close
0,2022-01-10,36068.871094,7115.770020,7445.299805,15768.269531,14942.830078
1,2022-01-11,36252.019531,7183.379883,7491.399902,15941.809570,15153.450195
2,2022-01-12,36290.320313,7237.189941,7551.700195,16010.320313,15188.389648
3,2022-01-13,36113.621094,7201.140137,7563.899902,16031.589844,14806.809570
4,2022-01-14,35911.808594,7143.000000,7543.000000,15883.240234,14893.750000
...,...,...,...,...,...,...
253,2022-06-20,33630.609375,5920.089844,7121.799805,13265.599609,10569.290039
254,2022-07-04,33630.609375,5954.649902,7232.700195,12773.379883,10569.290039
255,2022-09-05,33630.609375,6093.220215,7287.399902,12760.780273,10569.290039
256,2022-11-24,33630.609375,6707.319824,7466.600098,14539.559570,10569.290039


In [84]:
def create_series(series: Series) -> list[float]:
    array: list[float] = []
    for i, value in enumerate(series):
        if (i == 0):
            array.append(0)
        else:
            ln = math.log(value/series[i - 1])
            if (ln > 0.005):
                array.append(1)
            elif (ln < -0.005):
                array.append(-1)
            else:
                array.append(0)
    return array


def toSerie(df: DataFrame):
    dict_serie = {'Date': df['Date']}
    for column in df.loc[:, df_adj_close.columns != 'Date']:
        dict_serie[column] = create_series(df[column])
    return DataFrame.from_dict(dict_serie)

df_adj_close_serie = toSerie(df_adj_close)
df_adj_close_serie

Unnamed: 0,Date,DJI Adj Close,FCHI Adj Close,FTSE Adj Close,GDAXI Adj Close,IXIC Adj Close
0,2022-01-10,0,0,0,0,0
1,2022-01-11,1,1,1,1,1
2,2022-01-12,0,1,1,0,0
3,2022-01-13,0,0,0,0,-1
4,2022-01-14,-1,-1,0,-1,1
...,...,...,...,...,...,...
253,2022-06-20,0,-1,-1,-1,0
254,2022-07-04,0,1,1,-1,0
255,2022-09-05,0,1,1,0,0
256,2022-11-24,0,1,1,1,0


## 1.Construisez avec Pomegranate le MMC décrit ci-dessus.
Pour commencer, vous mettrez
des probabilités arbitraires. Attention : pour quelque raison mistérieuse, Pomegranate
ajoute toujours un état initial start et un état final end même si vous dites expressément
au constructeur de la classe HiddenMarkovModel que vous ne les voulez pas. Il suffit alors
d’ajouter une transition de probabilité 1/2 de start à bullish ainsi que de start à bearish.

In [85]:
bullish = DiscreteDistribution({'-1': 0.2, '0': 0.3,  '1': 0.5})
bearish = DiscreteDistribution({'-1': 0.5, '0': 0.2, '1': 0.3})

In [86]:
state_bullish = State(bullish, name="bullish")
state_bearish = State( bearish, name="bearish" )

In [87]:
model = HiddenMarkovModel('bourse')
model.add_states([state_bullish, state_bearish])
model.add_transition(model.start, state_bullish, 0.50)
model.add_transition(model.start, state_bearish, 0.50)
model.add_transition(state_bullish, state_bullish, 0.60)
model.add_transition(state_bullish, state_bearish, 0.40)
model.add_transition(state_bearish, state_bearish, 0.50)
model.add_transition(state_bearish, state_bullish, 0.50)
model.add_transition(state_bullish, model.end, 0.50)
model.add_transition(state_bearish, model.end, 0.50)
model.bake()


## 2.Créez, avec un petit script Python, une série historique de test.
Où vous allez simuler un marché qui alterne, sur un total di 1000 jours, 50 jours de taureau et 50 jours d’ours;
lorsque le marché est bullish,<br>
&nbsp;&nbsp;&nbsp;&nbsp;P (st= −1) = 0.2, P (st= 0) = 0.3, P (st= +1) = 0.5,<br>
et lorsqu’il est bearish,<br>
&nbsp;&nbsp;&nbsp;&nbsp;P (st= −1) = 0.5, P (st= 0) = 0.2, P (st= +1) = 0.3.

In [88]:
days = 1000
bullish_probs = [0.2, 0.3, 0.5]
bearish_probs = [0.5, 0.2, 0.3]
market = np.empty(days)

for i in range(days):
    if i % 100 <= 49:
        market[i] = np.random.choice([-1, 0, 1], p=bullish_probs)
    else:
        market[i] = np.random.choice([-1, 0, 1], p=bearish_probs)

market = market.astype(int)
market


array([ 0,  0, -1, -1,  1,  0,  1,  0,  0,  1,  1, -1,  1,  1,  1,  0,  1,
        1,  1, -1,  0, -1,  1,  1,  1,  1,  1,  1, -1,  0,  1,  0,  1,  0,
        0,  1, -1,  1,  0,  0,  1, -1,  0, -1, -1,  0,  0,  1,  0,  0,  0,
        0, -1, -1,  1, -1,  0, -1,  1,  1,  1, -1,  1,  1,  1, -1, -1,  0,
        0, -1, -1, -1,  1,  0,  0, -1, -1,  1,  1, -1, -1,  1, -1,  1, -1,
        1,  0, -1, -1,  1,  0, -1,  0, -1,  0,  1, -1, -1, -1,  1,  0,  0,
        1,  0,  0,  1,  1,  1,  1, -1,  0,  0,  0,  1,  1,  0, -1,  0,  0,
        1, -1,  0, -1,  1,  1,  0,  1,  1,  0, -1,  1,  0, -1,  1,  1, -1,
        1,  0,  1, -1,  1,  0,  1, -1,  1,  0, -1,  1,  0,  1, -1,  1,  0,
       -1, -1, -1,  1,  1,  0, -1, -1, -1, -1, -1, -1,  0, -1,  1, -1, -1,
       -1,  1, -1,  1, -1,  0,  1,  0,  0, -1,  0,  1, -1,  0, -1, -1, -1,
       -1,  1, -1, -1,  0,  1,  0, -1,  1, -1,  0, -1,  0,  1,  1, -1,  1,
        1,  1,  0,  1,  1, -1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,
        1, -1,  1,  1,  0

## 3. Entraînez le MMC (méthode fit) avec cette série de test. 
Une fois entraîné, le MMC
devrait reconnaître (méthode viterbi) les deux tendances, au fur et à mesure qu’elles
s’alternent. Vérifiez aussi que les probabilités d’émission des deux états reflètent bien les
paramètres que vous avez utilisé pour produire la série de test.

In [91]:
model.fit([market.tolist()], algorithm='viterbi')
model.viterbi(market)
print("bullish :", bullish.parameters)
print("bearish :", bearish.parameters)

ValueError: Symbol '0' is not defined in a distribution