In [26]:
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from numpy import log as ln

In [27]:
# Read recipe inputs
weekly_MA = dataiku.Dataset("weekly_MA")
hist = weekly_MA.get_dataframe()

# Position et distance prix / MA40 weekly #

In [28]:
def refined_indicators(df):
    '''Determines whether closing price is over (1) or below (0) sma'''

    indicator = 0

    df_calc = df[["Close","SMA_40"]].copy()
    df_calc["diff"] = df_calc.Close - df_calc.SMA_40
    df_calc["relative_distance"] = ((df_calc.Close - df_calc.SMA_40)/df_calc.SMA_40)*100

    df_calc["diff"] = df_calc["diff"].apply(lambda x: 1 if x>0 else 0)
    df_calc["relative_distance"] = df_calc["relative_distance"].apply(lambda x: round(x,2))

    return df_calc["diff"], df_calc["relative_distance"]

In [29]:
hist["Position"], hist["Relative_distance"] = refined_indicators(hist)

In [30]:
hist.tail(10)

Unnamed: 0,Date,Open,Close,Volume,Num_Semaine,SMA_40,Position,Relative_distance
1899,2022-11-21,11623.35,11756.03,14779100000,2022-47,12508.35,0,-6.01
1900,2022-11-28,11684.07,11994.26,25198780000,2022-48,12453.48,0,-3.69
1901,2022-12-05,11899.54,11563.33,22186460000,2022-49,12396.62,0,-6.72
1902,2022-12-12,11572.26,11243.72,29946270000,2022-50,12345.16,0,-8.92
1903,2022-12-19,11243.0,10985.45,22410180000,2022-51,12259.3,0,-10.39
1904,2022-12-26,10944.3,10939.76,15783390000,2022-52,12163.93,0,-10.06
1905,2023-01-02,11038.42,11040.35,19830080000,2023-01,12068.41,0,-8.52
1906,2023-01-09,11133.07,11541.48,25886490000,2023-02,11998.77,0,-3.81
1907,2023-01-16,11531.54,11619.03,21067950000,2023-03,11941.91,0,-2.7
1908,2023-01-20,11363.9,11619.03,1098966000,2023-03,11898.47,0,-2.35


# Distances et variations de prix Ln #

In [31]:
hist["Last_wk_close"] = hist.Close.shift(1)

In [32]:
hist.dropna(subset=['Last_wk_close'], inplace=True)
hist.reset_index(drop=True, inplace=True)

In [33]:
def ln_indicators(df):
    '''Calulates return and relative distance with natural log instead of % variation'''

    df_calc = df[["Close","SMA_40","Last_wk_close"]].copy()

    df_calc["ln_diff"] = ln(df.Close/df.Last_wk_close)*100
    df_calc["ln_diff"] = df_calc["ln_diff"].apply(lambda x: round(x,2))

    df_calc["ln_relative_distance"] = ln(df.Close/df.SMA_40)*100
    df_calc["ln_relative_distance"] = df_calc["ln_relative_distance"].apply(lambda x: round(x,2))


    return df_calc["ln_diff"], df_calc["ln_relative_distance"]

In [34]:
hist["ln_Return"], hist["ln_Relative_distance"] = ln_indicators(hist)

In [35]:
hist.drop(columns=["Last_wk_close","Open","Volume"],inplace=True)

In [36]:
hist.tail()

Unnamed: 0,Date,Close,Num_Semaine,SMA_40,Position,Relative_distance,ln_Return,ln_Relative_distance
1903,2022-12-26,10939.76,2022-52,12163.93,0,-10.06,-0.42,-10.61
1904,2023-01-02,11040.35,2023-01,12068.41,0,-8.52,0.92,-8.9
1905,2023-01-09,11541.48,2023-02,11998.77,0,-3.81,4.44,-3.89
1906,2023-01-16,11619.03,2023-03,11941.91,0,-2.7,0.67,-2.74
1907,2023-01-20,11619.03,2023-03,11898.47,0,-2.35,0.0,-2.38


# Colonnes Lags

In [38]:
hist["ln_Return_lag1"] = hist["ln_Return"].shift(1)
hist["ln_Return_lag4"] = hist["ln_Return"].shift(4)

hist["ln_Rel_dist_lag1"] = hist["ln_Relative_distance"].shift(1)
hist["ln_Rel_dist_lag1"] = hist["ln_Relative_distance"].shift(4)

In [40]:
hist.dropna(inplace=True)
hist.reset_index(drop=True, inplace=True)

In [41]:
hist.tail(10)

Unnamed: 0,Date,Close,Num_Semaine,SMA_40,Position,Relative_distance,ln_Return,ln_Relative_distance,ln_Return_lag1,ln_Return_lag4,ln_Rel_dist_lag1
1894,2022-11-21,11756.03,2022-47,12508.35,0,-6.01,0.67,-6.2,-1.19,2.06,-10.24
1895,2022-11-28,11994.26,2022-48,12453.48,0,-3.69,2.01,-3.76,0.67,-6.15,-15.69
1896,2022-12-05,11563.33,2022-49,12396.62,0,-6.72,-3.66,-6.96,2.01,8.47,-6.65
1897,2022-12-12,11243.72,2022-50,12345.16,0,-8.92,-2.8,-9.35,-3.66,-1.19,-7.33
1898,2022-12-19,10985.45,2022-51,12259.3,0,-10.39,-2.32,-10.97,-2.8,0.67,-6.2
1899,2022-12-26,10939.76,2022-52,12163.93,0,-10.06,-0.42,-10.61,-2.32,2.01,-3.76
1900,2023-01-02,11040.35,2023-01,12068.41,0,-8.52,0.92,-8.9,-0.42,-3.66,-6.96
1901,2023-01-09,11541.48,2023-02,11998.77,0,-3.81,4.44,-3.89,0.92,-2.8,-9.35
1902,2023-01-16,11619.03,2023-03,11941.91,0,-2.7,0.67,-2.74,4.44,-2.32,-10.97
1903,2023-01-20,11619.03,2023-03,11898.47,0,-2.35,0.0,-2.38,0.67,-0.42,-10.61


# Ecriture #

In [42]:
# Write recipe outputs
weekly_MA_enriched = dataiku.Dataset("weekly_MA_enriched")
weekly_MA_enriched.write_with_schema(hist)

1904 rows successfully written (dhTYjeRkhm)
