In [10]:
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import numpy as np

import yfinance as yf
from ta.volume import MFIIndicator

import seaborn as sns
from copy import copy
import matplotlib.pyplot as plt
import statistics as stats
import math

In [2]:
ndx = yf.Ticker("^NDX")

Valid intervals: [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo] <br/>
valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

# Données intraday #

In [3]:
df_historical_data_intraday = ndx.history(interval="1h", period="2y")
df_historical_data_intraday.head()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
2020-06-15 09:30:00-04:00,9527.608398,9634.824219,9490.064453,9593.790039,0,0,0
2020-06-15 10:30:00-04:00,9589.933594,9637.882812,9563.643555,9619.818359,226464349,0,0
2020-06-15 11:30:00-04:00,9619.604492,9677.96875,9600.012695,9669.588867,66796782,0,0
2020-06-15 12:30:00-04:00,9669.15332,9712.15332,9657.898438,9695.222656,62745422,0,0
2020-06-15 13:30:00-04:00,9694.538086,9807.669922,9690.604492,9798.049805,79934093,0,0


En intervalle de temps 1h, les données ne peuvent remonter que jusqu'à 2 ans max

# Données Daily #

## Exploration liée au Volume selon données yfinance ##

In [4]:
# Comparaison des données Daily avec valeurs investpy #
df_historical_data = ndx.history(interval="1d", period="max")
df_historical_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1985-10-01,110.620003,112.160004,110.565002,112.139999,153160000,0,0
1985-10-02,112.139999,112.540001,110.779999,110.824997,164640000,0,0
1985-10-03,110.839996,111.184998,110.120003,110.870003,147300000,0,0
1985-10-04,110.870003,110.870003,109.855003,110.074997,147900000,0,0
1985-10-07,110.074997,110.135002,108.175003,108.199997,128640000,0,0


In [5]:
df_historical_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-06-07,12456.490234,12737.299805,12413.55957,12711.679688,4383960000,0,0
2022-06-08,12682.80957,12776.650391,12578.549805,12615.129883,4689310000,0,0
2022-06-09,12543.370117,12666.410156,12265.860352,12269.780273,5382110000,0,0
2022-06-10,12052.589844,12082.69043,11825.349609,11832.820312,5125980000,0,0
2022-06-13,11472.185547,11568.645508,11254.18457,11295.958008,874560760,0,0


In [6]:
df_historical_data.describe()

Users of the modes 'nearest', 'lower', 'higher', or 'midpoint' are encouraged to review the method they. (Deprecated NumPy 1.22)
  return np.percentile(values, q, axis=axis, interpolation=interpolation)


Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
count,9251.0,9251.0,9251.0,9251.0,9251.0,9251.0,9251.0
mean,2716.974857,2739.112562,2693.032053,2717.294083,1529358000.0,0.0,0.0
std,3308.0715,3332.003804,3280.277928,3307.810483,1189890000.0,0.0,0.0
min,107.160004,108.269997,106.75,107.160004,31740000.0,0.0,0.0
25%,408.910004,411.335007,406.020004,408.910004,456050000.0,0.0,0.0
50%,1599.97998,1613.780029,1585.660034,1600.859985,1641560000.0,0.0,0.0
75%,3484.280029,3519.974976,3445.484985,3482.079956,2032270000.0,0.0,0.0
max,16644.769531,16764.859375,16523.830078,16573.339844,14250900000.0,0.0,0.0


In [7]:
df_historical_data.isnull().sum()

Open            0
High            0
Low             0
Close           0
Volume          0
Dividends       0
Stock Splits    0
dtype: int64

In [8]:
df_historical_data.drop(columns=["Dividends","Stock Splits"], inplace=True)

In [9]:
df_historical_data.shape

(9251, 5)

### Indicateur technique : MFI ###

In [104]:
''' Selon MFI Performance v5, toutes les valeurs suivantes surperforment le Buy & Hold.
Elles ont toutefois des périodes allant du simple au triple,
et un nombre de trades différent'''
mfi_list = [10,14,17,34]

In [105]:
df_mfi = df_historical_data.copy()

# Passade des dates d'index à colonne
df_mfi.reset_index(inplace=True)

In [106]:
for w in mfi_list :
    s_mfi = MFIIndicator(
        high = df_historical_data.High, 
        low = df_historical_data.Low, 
        close = df_historical_data.Close, 
        volume = df_historical_data.Volume, 
        window = w
    ).money_flow_index()
    
    # Ajout de la série en tant que colonne + arrondi
    # to_list() semble nécessaire dans cette version de Python ?!
    df_mfi['MFI_'+str(w)] = round(s_mfi, 2).to_list()
    
    # Pointeur sur la période maximale
    w_max = w

In [107]:
# Démarrage lorsque tous les MFI sont renseignés
df_mfi = df_mfi.loc[(w_max-1):]
df_mfi.reset_index(drop=True, inplace=True)

In [108]:
df_mfi.isnull().sum()

Date      0
Open      0
High      0
Low       0
Close     0
Volume    0
MFI_10    0
MFI_14    0
MFI_17    0
MFI_34    0
dtype: int64

In [109]:
df_mfi.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,MFI_10,MFI_14,MFI_17,MFI_34
9213,2022-06-07,12456.490234,12737.299805,12413.55957,12711.679688,4383960000,61.99,50.47,54.08,38.02
9214,2022-06-08,12682.80957,12776.650391,12578.549805,12615.129883,4689310000,71.66,57.79,53.5,40.87
9215,2022-06-09,12543.370117,12666.410156,12265.860352,12269.780273,5382110000,61.16,57.41,53.15,40.97
9216,2022-06-10,12052.589844,12082.69043,11825.349609,11832.820312,5125980000,51.35,57.6,47.0,40.95
9217,2022-06-13,11472.185547,11568.645508,11254.18457,11295.958008,874560760,45.16,53.93,49.49,42.02


In [110]:
df_mfi.shape

(9218, 10)

To Do :
<li> SMA on MFI</li>
<li> MFI var compared to -3, -5, -8 jours</li>