In [1]:
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import numpy as np

import yfinance as yf
from ta.volume import MFIIndicator

import seaborn as sns
from copy import copy
import matplotlib.pyplot as plt
import statistics as stats
import math

In [2]:
ndx = yf.Ticker("^NDX")

Valid intervals: [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo] <br/>
valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

# Données intraday #

In [3]:
df_historical_data_intraday = ndx.history(interval="1h", period="2y")
df_historical_data_intraday.head()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
2020-06-15 09:30:00-04:00,9527.608398,9634.824219,9490.064453,9593.790039,0,0,0
2020-06-15 10:30:00-04:00,9589.933594,9637.882812,9563.643555,9619.818359,226464349,0,0
2020-06-15 11:30:00-04:00,9619.604492,9677.96875,9600.012695,9669.588867,66796782,0,0
2020-06-15 12:30:00-04:00,9669.15332,9712.15332,9657.898438,9695.222656,62745422,0,0
2020-06-15 13:30:00-04:00,9694.538086,9807.669922,9690.604492,9798.049805,79934093,0,0


En intervalle de temps 1h, les données ne peuvent remonter que jusqu'à 2 ans max

# Données Daily #

## Exploration liée au Volume selon données yfinance ##

In [4]:
# Comparaison des données Daily avec valeurs investpy #
df_historical_data = ndx.history(interval="1d", period="max")
df_historical_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1985-10-01,110.620003,112.160004,110.565002,112.139999,153160000,0,0
1985-10-02,112.139999,112.540001,110.779999,110.824997,164640000,0,0
1985-10-03,110.839996,111.184998,110.120003,110.870003,147300000,0,0
1985-10-04,110.870003,110.870003,109.855003,110.074997,147900000,0,0
1985-10-07,110.074997,110.135002,108.175003,108.199997,128640000,0,0


In [5]:
df_historical_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-06-08,12682.80957,12776.650391,12578.549805,12615.129883,4689310000,0,0
2022-06-09,12543.370117,12666.410156,12265.860352,12269.780273,5382110000,0,0
2022-06-10,12052.589844,12082.69043,11825.349609,11832.820312,5125980000,0,0
2022-06-13,11472.19043,11568.650391,11254.19043,11288.320312,5912360000,0,0
2022-06-14,11385.200195,11418.217773,11205.923828,11250.227539,629197356,0,0


In [6]:
df_historical_data.describe()

Users of the modes 'nearest', 'lower', 'higher', or 'midpoint' are encouraged to review the method they. (Deprecated NumPy 1.22)
  return np.percentile(values, q, axis=axis, interpolation=interpolation)


Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
count,9252.0,9252.0,9252.0,9252.0,9252.0,9252.0,9252.0
mean,2717.91176,2740.050641,2693.952167,2718.215537,1529805000.0,0.0,0.0
std,3309.120036,3333.045294,3281.294406,3308.818979,1190715000.0,0.0,0.0
min,107.160004,108.269997,106.75,107.160004,31740000.0,0.0,0.0
25%,408.910004,411.357506,406.105003,408.910004,456075000.0,0.0,0.0
50%,1600.299988,1614.0,1585.850037,1601.0,1641635000.0,0.0,0.0
75%,3484.315002,3521.549988,3447.757446,3484.19751,2032610000.0,0.0,0.0
max,16644.769531,16764.859375,16523.830078,16573.339844,14250900000.0,0.0,0.0


In [7]:
df_historical_data.isnull().sum()

Open            0
High            0
Low             0
Close           0
Volume          0
Dividends       0
Stock Splits    0
dtype: int64

In [8]:
df_historical_data.drop(columns=["Dividends","Stock Splits"], inplace=True)

In [9]:
df_historical_data.shape

(9252, 5)

### Indicateur technique : MFI ###

In [85]:
''' Selon MFI Performance v5, toutes les valeurs suivantes surperforment le Buy & Hold.
Elles ont toutefois des périodes allant du simple au triple,
et un nombre de trades différent'''
mfi_list = [10,14,17,37]

In [86]:
df_mfi = df_historical_data.copy()

# Passade des dates d'index à colonne
df_mfi.reset_index(inplace=True)

In [87]:
for w in mfi_list :
    s_mfi = MFIIndicator(
        high = df_historical_data.High, 
        low = df_historical_data.Low, 
        close = df_historical_data.Close, 
        volume = df_historical_data.Volume, 
        window = w
    ).money_flow_index()
    
    # Ajout de la série en tant que colonne + arrondi
    # to_list() semble nécessaire dans cette version de Python ?!
    df_mfi['MFI_'+str(w)] = round(s_mfi, 2).to_list()
    
    # Pointeur sur la période maximale
    w_max = w

In [88]:
# Démarrage lorsque tous les MFI sont renseignés
df_mfi = df_mfi.loc[(w_max-1):]
df_mfi.reset_index(drop=True, inplace=True)

In [89]:
df_mfi.isnull().sum()

Date      0
Open      0
High      0
Low       0
Close     0
Volume    0
MFI_10    0
MFI_14    0
MFI_17    0
MFI_37    0
dtype: int64

In [90]:
df_mfi.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,MFI_10,MFI_14,MFI_17,MFI_37
9211,2022-06-08,12682.80957,12776.650391,12578.549805,12615.129883,4689310000,71.66,57.79,53.5,40.29
9212,2022-06-09,12543.370117,12666.410156,12265.860352,12269.780273,5382110000,61.16,57.41,53.15,40.18
9213,2022-06-10,12052.589844,12082.69043,11825.349609,11832.820312,5125980000,51.35,57.6,47.0,37.52
9214,2022-06-13,11472.19043,11568.650391,11254.19043,11288.320312,5912360000,40.93,50.24,46.7,37.49
9215,2022-06-14,11385.200195,11418.217773,11205.923828,11250.227539,629197356,32.11,53.31,49.33,38.56


In [91]:
df_mfi.shape

(9216, 10)

#### Indicateurs techniques avancés sur MFI - Décomposé ###

To Do :
<li> SMA on MFI</li>
<li> MFI var compared to -3, -5, -8 jours</li>

In [64]:
colonnes_mfi = df_mfi.columns[-4:].to_list()

In [40]:
for colonne in colonnes_mfi :
    p = 5
    short_p = 3
    
    col_name = colonne + "_SMA" + str(short_p)
    df_mfi[col_name] = round( df_mfi[colonne].rolling(short_p).mean() ,2)
    
    col_name = colonne + "_SMA" + str(p)
    df_mfi[col_name] = round( df_mfi[colonne].rolling(p).mean() ,2)

In [41]:
df_mfi.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,MFI_10,MFI_14,MFI_17,MFI_37,MFI_10_SMA3,MFI_10_SMA5,MFI_14_SMA3,MFI_14_SMA5,MFI_17_SMA3,MFI_17_SMA5,MFI_37_SMA3,MFI_37_SMA5
9211,2022-06-08,12682.80957,12776.650391,12578.549805,12615.129883,4689310000,71.66,57.79,53.5,40.29,68.35,65.14,55.29,54.81,53.47,51.89,39.45,38.71
9212,2022-06-09,12543.370117,12666.410156,12265.860352,12269.780273,5382110000,61.16,57.41,53.15,40.18,64.94,65.4,55.22,54.75,53.58,52.02,39.38,39.25
9213,2022-06-10,12052.589844,12082.69043,11825.349609,11832.820312,5125980000,51.35,57.6,47.0,37.52,61.39,63.51,57.6,56.18,51.22,52.11,39.33,39.21
9214,2022-06-13,11472.19043,11568.650391,11254.19043,11288.320312,5912360000,40.93,50.24,46.7,37.49,51.15,57.42,55.08,54.7,48.95,50.89,38.4,38.63
9215,2022-06-14,11385.200195,11418.217773,11205.923828,11250.227539,629197356,32.11,53.31,49.33,38.56,41.46,51.44,53.72,55.27,47.68,49.94,37.86,38.81


In [44]:
# Variation à -x jours, MFI valeur absolue
round( (df_mfi["MFI_10"] - df_mfi["MFI_10"].shift(3)) / df_mfi["MFI_10"].shift(3) ,2)

0        NaN
1        NaN
2        NaN
3      -0.09
4      -0.10
        ... 
9211    0.18
9212   -0.14
9213   -0.17
9214   -0.43
9215   -0.47
Name: MFI_10, Length: 9216, dtype: float64

In [45]:
# Variation à -x jours, MFI_MA
round( (df_mfi["MFI_10_SMA3"] - df_mfi["MFI_10_SMA3"].shift(3)) / df_mfi["MFI_10_SMA3"].shift(3) ,2)

0        NaN
1        NaN
2        NaN
3        NaN
4        NaN
        ... 
9211    0.20
9212    0.01
9213   -0.05
9214   -0.25
9215   -0.36
Name: MFI_10_SMA3, Length: 9216, dtype: float64

#### Indicateurs techniques avancés sur MFI - Synthèse ####

In [92]:
colonnes_mfi = df_mfi.columns[-4:].to_list()
periodes = [3,5,8]

In [93]:
for colonne in colonnes_mfi : 
    '''Calcul des moyennes mobiles, MA3 et MA5,
    pour chacune des colonnes MFI'''
    
    # SMA 3
    col_name_short = str(colonne) + "_SMA" + str(periodes[0])
    df_mfi[col_name_short] = round( df_mfi[colonne].rolling(periodes[0]).mean() ,2)
    # SMA 5
    col_name_mid = str(colonne) + "_SMA" + str(periodes[1])
    df_mfi[col_name_mid] = round( df_mfi[colonne].rolling(periodes[1]).mean() ,2)
    
    for p in periodes :
        '''Calcul des variations (Slope)
        d'abord MFI valeur absolue, à -3, -5 et -8 jours
        pour pour les SMA pour les mêmes périodes'''
        col_name_var = "var_" + str(colonne) + "_" + str(p) + "j"
        df_mfi[col_name_var] = round( (df_mfi[colonne] - df_mfi[colonne].shift(p)) / df_mfi[colonne].shift(p) ,2)

        # Variations de short MA, à -3, -5 et -8 jours
        col_name_short_var = "var_" + col_name_short + "_" + str(p) + "j" 
        shifted = df_mfi[col_name_short].shift(p)
        df_mfi[col_name_short_var] = round( (df_mfi[col_name_short] - shifted) / shifted ,2)
        
        # Variations de mid MA, à -3, -5 et -8 jours
        col_name_mid_var = "var_" + col_name_mid + "_" + str(p) + "j" 
        shifted = df_mfi[col_name_mid].shift(p)
        df_mfi[col_name_mid_var] = round( (df_mfi[col_name_mid] - shifted) / shifted ,2)

In [97]:
df_mfi.isnull().sum()

Date                   0
Open                   0
High                   0
Low                    0
Close                  0
Volume                 0
MFI_10                 0
MFI_14                 0
MFI_17                 0
MFI_37                 0
MFI_10_SMA3            2
MFI_10_SMA5            4
var_MFI_10_3j          3
var_MFI_10_SMA3_3j     5
var_MFI_10_SMA5_3j     7
var_MFI_10_5j          5
var_MFI_10_SMA3_5j     7
var_MFI_10_SMA5_5j     9
var_MFI_10_8j          8
var_MFI_10_SMA3_8j    10
var_MFI_10_SMA5_8j    12
MFI_14_SMA3            2
MFI_14_SMA5            4
var_MFI_14_3j          3
var_MFI_14_SMA3_3j     5
var_MFI_14_SMA5_3j     7
var_MFI_14_5j          5
var_MFI_14_SMA3_5j     7
var_MFI_14_SMA5_5j     9
var_MFI_14_8j          8
var_MFI_14_SMA3_8j    10
var_MFI_14_SMA5_8j    12
MFI_17_SMA3            2
MFI_17_SMA5            4
var_MFI_17_3j          3
var_MFI_17_SMA3_3j     5
var_MFI_17_SMA5_3j     7
var_MFI_17_5j          5
var_MFI_17_SMA3_5j     7
var_MFI_17_SMA5_5j     9


In [101]:
df_mfi = df_mfi.loc[12:]
df_mfi.reset_index(drop=True, inplace=True)
df_mfi.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,MFI_10,MFI_14,MFI_17,MFI_37,MFI_10_SMA3,MFI_10_SMA5,var_MFI_10_3j,var_MFI_10_SMA3_3j,var_MFI_10_SMA5_3j,var_MFI_10_5j,var_MFI_10_SMA3_5j,var_MFI_10_SMA5_5j,var_MFI_10_8j,var_MFI_10_SMA3_8j,var_MFI_10_SMA5_8j,MFI_14_SMA3,MFI_14_SMA5,var_MFI_14_3j,var_MFI_14_SMA3_3j,var_MFI_14_SMA5_3j,var_MFI_14_5j,var_MFI_14_SMA3_5j,var_MFI_14_SMA5_5j,var_MFI_14_8j,var_MFI_14_SMA3_8j,var_MFI_14_SMA5_8j,MFI_17_SMA3,MFI_17_SMA5,var_MFI_17_3j,var_MFI_17_SMA3_3j,var_MFI_17_SMA5_3j,var_MFI_17_5j,var_MFI_17_SMA3_5j,var_MFI_17_SMA5_5j,var_MFI_17_8j,var_MFI_17_SMA3_8j,var_MFI_17_SMA5_8j,MFI_37_SMA3,MFI_37_SMA5,var_MFI_37_3j,var_MFI_37_SMA3_3j,var_MFI_37_SMA5_3j,var_MFI_37_5j,var_MFI_37_SMA3_5j,var_MFI_37_SMA5_5j,var_MFI_37_8j,var_MFI_37_SMA3_8j,var_MFI_37_SMA5_8j
0,1985-12-09,126.955002,127.644997,126.830002,127.485001,209600000,52.34,66.44,67.03,67.14,63.28,67.13,-0.28,-0.13,-0.13,-0.28,-0.2,-0.13,-0.29,-0.17,-0.14,69.9,70.25,-0.11,-0.03,-0.07,-0.1,-0.11,-0.11,-0.17,-0.13,-0.12,69.7,71.0,-0.09,-0.07,-0.08,-0.14,-0.13,-0.11,-0.14,-0.11,-0.09,70.18,71.03,-0.07,-0.04,-0.05,-0.1,-0.07,-0.04,-0.08,-0.02,0.01
1,1985-12-10,127.485001,128.524994,127.154999,127.974998,223600000,61.87,66.78,67.4,67.4,59.06,64.98,-0.17,-0.2,-0.14,-0.15,-0.22,-0.16,-0.26,-0.23,-0.18,66.98,70.21,-0.12,-0.07,-0.06,-0.0,-0.09,-0.08,-0.17,-0.17,-0.13,67.37,69.95,-0.09,-0.08,-0.08,-0.07,-0.14,-0.11,-0.14,-0.14,-0.11,68.22,70.09,-0.08,-0.06,-0.05,-0.06,-0.09,-0.06,-0.11,-0.07,-0.02
2,1985-12-11,127.974998,129.649994,127.974998,129.179993,253000000,63.23,67.58,68.1,70.31,59.15,62.99,0.0,-0.16,-0.12,-0.14,-0.19,-0.18,-0.23,-0.26,-0.2,66.93,68.81,-0.0,-0.08,-0.04,-0.09,-0.07,-0.09,-0.16,-0.17,-0.15,67.51,68.92,0.01,-0.06,-0.06,-0.07,-0.1,-0.11,-0.19,-0.16,-0.13,68.28,69.65,0.0,-0.05,-0.04,-0.03,-0.07,-0.06,-0.09,-0.09,-0.05
3,1985-12-12,129.179993,130.100006,129.179993,129.764999,234600000,63.85,67.66,73.37,72.69,62.98,60.85,0.22,-0.0,-0.09,-0.14,-0.14,-0.19,-0.12,-0.21,-0.21,67.34,67.23,0.02,-0.04,-0.04,-0.1,-0.07,-0.1,-0.09,-0.14,-0.15,69.62,68.72,0.09,-0.0,-0.03,-0.01,-0.05,-0.1,-0.06,-0.13,-0.13,70.13,69.53,0.08,-0.0,-0.02,-0.01,-0.03,-0.06,-0.03,-0.07,-0.06
4,1985-12-13,129.764999,132.169998,129.764999,132.125,263400000,66.35,68.42,73.94,73.21,64.48,61.53,0.07,0.09,-0.05,0.05,-0.08,-0.14,-0.09,-0.15,-0.2,67.89,67.38,0.02,0.01,-0.04,0.01,-0.07,-0.06,0.02,-0.08,-0.12,71.8,69.97,0.1,0.07,0.0,0.09,0.0,-0.04,0.02,-0.08,-0.11,72.07,70.15,0.09,0.06,0.0,0.04,0.0,-0.03,0.02,-0.03,-0.06


In [103]:
df_mfi.shape

(9204, 54)