In [128]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import plotly.graph_objects as go
from plotly.subplots import make_subplots

df = pd.read_csv("data.csv")

In [129]:
from trade_ibkr.calc import calc_support_resistance_levels

sr_levels = calc_support_resistance_levels(df)

In [135]:
from trade_ibkr.utils import closest_diff

# ----------------- Add needed indicators

df["EMA_5"] = ta.ema(df["close"], length=5)
df["EMA_10"] = ta.ema(df["close"], length=10)
df["EMA_20"] = ta.ema(df["close"], length=20)
df["EMA_60"] = ta.ema(df["close"], length=60)
df["volume ema 10"] = ta.ema(df["volume"], length=10)
df["down wick ratio"] = abs((df["low"] - np.minimum(df["open"], df["close"])) / (df["high"] - df["low"]))
df["up wick ratio"] = abs((df["high"] - np.maximum(df["open"], df["close"])) / (df["high"] - df["low"]))
df["sr level diff"] = df["close"].apply(lambda val: closest_diff(sorted(sr_levels.levels_data.keys()), val))

# ----------------- Define target

# 0: -5 Ampl
# 1: -2 Ampl
# 2: -2 ~ 2 Ampl
# 3: 2 Ampl
# 4: 5 Ampl
df["close +5 diff"] = df["close"].shift(-7) - df["close"]
conditions = [
    (df["close +5 diff"] < df["amplitude"] * -2),
    (df["close +5 diff"] > df["amplitude"] * 2),
    (df["close +5 diff"] < 0),
    (df["close +5 diff"] > 0),
]
df["outcome"] = np.select(conditions, range(len(conditions)))


# ----------------- Define features

def directional_penetration(col_name: str):
    df[f"SIG_LOWER_TO_{col_name.upper()}-"] = (
            ((df["close"].shift(3) - df[col_name]) < df["amplitude"] * -0.8) &
            ((df["close"] - df[col_name]) > df["amplitude"] * -0.3)
    )
    df[f"SIG_UPPER_TO_{col_name.upper()}+"] = (
            ((df["close"].shift(3) - df[col_name]) > df["amplitude"] * 0.8) &
            ((df["close"] - df[col_name]) < df["amplitude"] * 0.3)
    )


def directional_penetration_const(num: float):
    df[f"SIG_LOWER_TO_{num}-"] = (
            ((df["close"].shift(3) - num) < df["amplitude"] * -0.8) &
            ((df["close"] - num) > df["amplitude"] * -0.3)
    )
    df[f"SIG_UPPER_TO_{num}+"] = (
            ((df["close"].shift(3) - num) > df["amplitude"] * 0.8) &
            ((df["close"] - num) < df["amplitude"] * 0.3)
    )


df["SIG_C_5EMA+"] = df["close"] > df["EMA_5"]
df["SIG_C_5EMA-"] = df["close"] < df["EMA_5"]
df["SIG_C_10EMA+"] = df["close"] > df["EMA_10"]
df["SIG_C_10EMA-"] = df["close"] < df["EMA_10"]
df["SIG_C_20EMA+"] = df["close"] > df["EMA_20"]
df["SIG_C_20EMA-"] = df["close"] < df["EMA_20"]
df["SIG_5EMA_10EMA+"] = df["EMA_5"] > df["EMA_10"]
df["SIG_5EMA_10EMA-"] = df["EMA_5"] < df["EMA_10"]
df["SIG_10EMA_20EMA+"] = df["EMA_10"] > df["EMA_20"]
df["SIG_10EMA_20EMA-"] = df["EMA_10"] < df["EMA_20"]
df["SIG_HIGH_VOL_UP+"] = (df["down wick ratio"] >= 0.6) & (df["volume"] > df["volume ema 10"] * 1.3)
df["SIG_HIGH_VOL_DOWN-"] = (df["up wick ratio"] >= 0.6) & (df["volume"] > df["volume ema 10"] * 1.3)
df["SIG_STRONG_UP+"] = df["close"] - df["open"] > df["amplitude"] * 1.5
df["SIG_STRONG_DOWN-"] = df["close"] - df["open"] < df["amplitude"] * -1.5
directional_penetration("vwap")
directional_penetration("EMA_60")
# for level in sorted(sr_levels.levels_data.keys()):
#     directional_penetration_const(level)

df

Unnamed: 0,date,open,high,low,close,epoch_sec,volume,vwap,date.1,market_date,...,SIG_10EMA_20EMA+,SIG_10EMA_20EMA-,SIG_HIGH_VOL_UP+,SIG_HIGH_VOL_DOWN-,SIG_STRONG_UP+,SIG_STRONG_DOWN-,SIG_LOWER_TO_VWAP-,SIG_UPPER_TO_VWAP+,SIG_LOWER_TO_EMA_60-,SIG_UPPER_TO_EMA_60+
0,2022-02-16 17:00:00,14593.25,14602.75,14583.50,14591.50,1645052400,685,14591.500000,2022-02-16 17:00:00,2022-02-17,...,False,False,False,False,False,False,False,False,False,False
1,2022-02-16 17:01:00,14591.75,14597.00,14590.75,14594.00,1645052460,228,14592.124315,2022-02-16 17:01:00,2022-02-17,...,False,False,False,False,False,False,False,False,False,False
2,2022-02-16 17:02:00,14594.00,14600.50,14593.75,14598.00,1645052520,190,14593.136446,2022-02-16 17:02:00,2022-02-17,...,False,False,False,False,False,False,False,False,False,False
3,2022-02-16 17:03:00,14598.50,14607.25,14597.75,14607.00,1645052580,362,14596.562116,2022-02-16 17:03:00,2022-02-17,...,False,False,False,False,False,False,False,False,False,False
4,2022-02-16 17:04:00,14607.00,14609.00,14604.00,14605.00,1645052640,214,14597.637582,2022-02-16 17:04:00,2022-02-17,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2755,2022-02-18 15:55:00,14000.75,14002.50,13993.50,13995.00,1645221300,210,14094.422031,2022-02-18 15:55:00,2022-02-18,...,False,True,False,False,False,False,False,False,False,False
2756,2022-02-18 15:56:00,13995.75,13998.50,13993.50,13995.75,1645221360,128,14094.414710,2022-02-18 15:56:00,2022-02-18,...,False,True,False,False,False,False,False,False,False,False
2757,2022-02-18 15:57:00,13995.25,14001.25,13995.25,13998.75,1645221420,121,14094.408001,2022-02-18 15:57:00,2022-02-18,...,False,True,False,False,False,False,False,False,False,False
2758,2022-02-18 15:58:00,13998.25,14001.25,13995.25,13995.25,1645221480,140,14094.399956,2022-02-18 15:58:00,2022-02-18,...,False,True,False,False,False,False,False,False,False,False


In [138]:
df2 = df.copy()
for col in [c for c in df.columns if c.startswith("SIG")]:
    match = df2[col] & (
        df2["close +5 diff"] > 0
        if col.endswith("+") else
        df2["close +5 diff"] < 0
    )
    df2[f"match % - {col}"] = match.rolling(window=60).sum() / 60
df2

Unnamed: 0,date,open,high,low,close,epoch_sec,volume,vwap,date.1,market_date,...,match % - SIG_10EMA_20EMA+,match % - SIG_10EMA_20EMA-,match % - SIG_HIGH_VOL_UP+,match % - SIG_HIGH_VOL_DOWN-,match % - SIG_STRONG_UP+,match % - SIG_STRONG_DOWN-,match % - SIG_LOWER_TO_VWAP-,match % - SIG_UPPER_TO_VWAP+,match % - SIG_LOWER_TO_EMA_60-,match % - SIG_UPPER_TO_EMA_60+
0,2022-02-16 17:00:00,14593.25,14602.75,14583.50,14591.50,1645052400,685,14591.500000,2022-02-16 17:00:00,2022-02-17,...,,,,,,,,,,
1,2022-02-16 17:01:00,14591.75,14597.00,14590.75,14594.00,1645052460,228,14592.124315,2022-02-16 17:01:00,2022-02-17,...,,,,,,,,,,
2,2022-02-16 17:02:00,14594.00,14600.50,14593.75,14598.00,1645052520,190,14593.136446,2022-02-16 17:02:00,2022-02-17,...,,,,,,,,,,
3,2022-02-16 17:03:00,14598.50,14607.25,14597.75,14607.00,1645052580,362,14596.562116,2022-02-16 17:03:00,2022-02-17,...,,,,,,,,,,
4,2022-02-16 17:04:00,14607.00,14609.00,14604.00,14605.00,1645052640,214,14597.637582,2022-02-16 17:04:00,2022-02-17,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2755,2022-02-18 15:55:00,14000.75,14002.50,13993.50,13995.00,1645221300,210,14094.422031,2022-02-18 15:55:00,2022-02-18,...,0.166667,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.016667
2756,2022-02-18 15:56:00,13995.75,13998.50,13993.50,13995.75,1645221360,128,14094.414710,2022-02-18 15:56:00,2022-02-18,...,0.166667,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.016667
2757,2022-02-18 15:57:00,13995.25,14001.25,13995.25,13998.75,1645221420,121,14094.408001,2022-02-18 15:57:00,2022-02-18,...,0.166667,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.016667
2758,2022-02-18 15:58:00,13998.25,14001.25,13995.25,13995.25,1645221480,140,14094.399956,2022-02-18 15:58:00,2022-02-18,...,0.166667,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.016667


In [119]:
from torch import nn, optim, tensor, argmax, max
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_dim, 15)
        self.layer2 = nn.Linear(15, 4)
        self.drop = nn.Dropout(0.2)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.drop(x)
        x = F.softmax(self.layer2(x), dim=1)  # To check with the loss function
        return x


# i = 180
results = [float("nan")] * 180
for i in range(180, len(df.index) - 1):
    df_first = df.iloc[:i]
    df_feature_cols = [
        col for col in df_first.columns
        if col.startswith("SIG")
    ]
    tensor_features = tensor(df_first[df_feature_cols].values).float()
    tensor_target = tensor(df_first["outcome"]).long()

    model = Model(df_first[df_feature_cols].shape[1])
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    loss_fn = nn.CrossEntropyLoss()
    epochs = 50

    for epoch in range(1, epochs + 1):
        y_pred = model(tensor_features)
        loss = loss_fn(y_pred, tensor_target)

        # Zero gradients
        optimizer.zero_grad()
        loss.backward()  # Gradients
        optimizer.step()  # Update

    result = model(tensor([df.iloc[i + 1][df_feature_cols].values.astype(float)]).float())
    result_max = max(result)
    results.append(argmax(result).item() if result_max > 0.6 else float("nan"))
    print(f"{i} / {len(df.index) - 1} - {len(results)}")
df["result"] = results + [float("nan")]

180 / 2759 - 181
181 / 2759 - 182
182 / 2759 - 183
183 / 2759 - 184
184 / 2759 - 185
185 / 2759 - 186
186 / 2759 - 187
187 / 2759 - 188
188 / 2759 - 189
189 / 2759 - 190
190 / 2759 - 191
191 / 2759 - 192
192 / 2759 - 193
193 / 2759 - 194
194 / 2759 - 195
195 / 2759 - 196
196 / 2759 - 197
197 / 2759 - 198
198 / 2759 - 199
199 / 2759 - 200
200 / 2759 - 201
201 / 2759 - 202
202 / 2759 - 203
203 / 2759 - 204
204 / 2759 - 205
205 / 2759 - 206
206 / 2759 - 207
207 / 2759 - 208
208 / 2759 - 209
209 / 2759 - 210
210 / 2759 - 211
211 / 2759 - 212
212 / 2759 - 213
213 / 2759 - 214
214 / 2759 - 215
215 / 2759 - 216
216 / 2759 - 217
217 / 2759 - 218
218 / 2759 - 219
219 / 2759 - 220
220 / 2759 - 221
221 / 2759 - 222
222 / 2759 - 223
223 / 2759 - 224
224 / 2759 - 225
225 / 2759 - 226
226 / 2759 - 227
227 / 2759 - 228
228 / 2759 - 229
229 / 2759 - 230
230 / 2759 - 231
231 / 2759 - 232
232 / 2759 - 233
233 / 2759 - 234
234 / 2759 - 235
235 / 2759 - 236
236 / 2759 - 237
237 / 2759 - 238
238 / 2759 - 2

In [120]:
df2 = df[df["result"].notnull()]
df2["outcome 2"] = df2["outcome"]
df2["match"] = df2["outcome 2"] == df2["result"]
df2["not match"] = df2["outcome 2"] != df2["result"]
df2["match %"] = df2["match"].cumsum() / (df2["match"].cumsum() + df2["not match"].cumsum())
df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["outcome 2"] = df2["outcome"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["match"] = df2["outcome 2"] == df2["result"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["not match"] = df2["outcome 2"] != df2["result"]
A value is trying to be set on a copy of a slice from a DataFrame.
Tr

Unnamed: 0,date,open,high,low,close,epoch_sec,volume,vwap,date.1,market_date,...,SIG_STRONG_DOWN,SIG_LOWER_TO_VWAP,SIG_UPPER_TO_VWAP,SIG_LOWER_TO_EMA_60,SIG_UPPER_TO_EMA_60,result,outcome 2,match,not match,match %
182,2022-02-16 20:02:00,14584.75,14591.50,14584.25,14589.00,1645063320,622,14581.761058,2022-02-16 20:02:00,2022-02-17,...,False,True,False,False,False,3.0,2,False,True,0.000000
184,2022-02-16 20:04:00,14592.50,14593.00,14588.75,14591.50,1645063440,281,14581.892669,2022-02-16 20:04:00,2022-02-17,...,False,False,False,False,False,2.0,2,True,False,0.500000
185,2022-02-16 20:05:00,14591.75,14596.25,14588.75,14589.00,1645063500,451,14581.967074,2022-02-16 20:05:00,2022-02-17,...,False,False,False,False,False,2.0,3,False,True,0.333333
189,2022-02-16 20:09:00,14585.00,14585.50,14583.25,14585.25,1645063740,103,14582.020778,2022-02-16 20:09:00,2022-02-17,...,False,False,False,False,False,3.0,3,True,False,0.500000
190,2022-02-16 20:10:00,14585.25,14588.00,14584.75,14586.50,1645063800,107,14582.031712,2022-02-16 20:10:00,2022-02-17,...,False,False,False,False,False,2.0,3,False,True,0.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2734,2022-02-18 15:34:00,14026.00,14026.50,14014.50,14015.50,1645220040,330,14094.678382,2022-02-18 15:34:00,2022-02-18,...,False,False,False,False,False,3.0,2,False,True,0.387874
2736,2022-02-18 15:36:00,14010.25,14015.00,14006.50,14011.00,1645220160,360,14094.639588,2022-02-18 15:36:00,2022-02-18,...,False,False,False,False,False,3.0,3,True,False,0.388321
2746,2022-02-18 15:46:00,14008.00,14008.25,13989.50,13995.75,1645220760,646,14094.537943,2022-02-18 15:46:00,2022-02-18,...,False,False,False,False,False,2.0,3,False,True,0.388038
2753,2022-02-18 15:53:00,13994.75,14003.75,13992.00,14000.75,1645221180,281,14094.443212,2022-02-18 15:53:00,2022-02-18,...,False,False,False,False,False,2.0,0,False,True,0.387755


In [41]:
fig = make_subplots(
    rows=2,
    vertical_spacing=0.02,
    shared_xaxes=True,
    specs=[
        [{}],
        [{}],
    ],
    row_heights=[7, 2],
)
fig.add_trace(
    go.Candlestick(
        x=df["date"],
        open=df["open"],
        high=df["high"],
        low=df["low"],
        close=df["close"],
    ),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(
        x=df["date"],
        y=df["cdl sum c10"],
    ),
    row=2,
    col=1,
)
fig.update_layout(
    margin={"l": 20, "r": 20, "b": 20, "t": 20, "pad": 4},
    height=650,
    xaxis_rangeslider_visible=False,
)
fig.show()