In [None]:
import pandas as pd
import plotly.express as px
import talib as ta
import zipfile
import os

from sklearn.cluster import KMeans
from histdata import download_hist_data as dl

In [None]:

dl(year="2022", month=1, pair="eurusd")

with zipfile.ZipFile("./DAT_ASCII_EURUSD_M1_202201.zip", 'r') as zip_ref:
    zip_ref.extractall("./data")

os.remove("./DAT_ASCII_EURUSD_M1_202201.zip")
    
df = pd.read_csv("./data/DAT_ASCII_EURUSD_M1_202201.csv", sep=";", names= ["date_time", "open", "high", "low", "close", "volume"])
    

In [None]:
df_melt = df.iloc[30000:].melt(id_vars="date_time",value_vars=["open", "high", "low", "close"], value_name="price", var_name="price_point")

In [None]:
fig = px.line(df_melt, x="date_time", y="price", color="price_point", title="EURUSD Price Time Series")

fig.update_layout(height=500, width=1000)
fig.write_image("price.png")
fig.show()

In [None]:
cci = ta.CCI(df.high, df.low, df.close, timeperiod=14)
ht = ta.HT_DCPHASE(df.close)
aroon = ta.AROONOSC(df.high, df.low, timeperiod=14)

ta_df = pd.DataFrame({"date_time": df["date_time"].to_list(), "cci": cci, "aroon": aroon, "ht": ht})
ta_df = ta_df.dropna()
df = df.loc[ta_df.index, :]

In [None]:
df_melt = df.iloc[1300:1600].melt(id_vars="date_time",value_vars=["open", "high", "low", "close"], value_name="price", var_name="price_point")
ta_melt = ta_df.iloc[1300:1600].melt(id_vars="date_time",value_vars=["cci", "aroon", "ht"], value_name="price", var_name="price_point")
df_melt = pd.concat([df_melt, ta_melt])

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=4, cols=1, shared_xaxes=True)

fig.add_trace(
    go.Scatter(x=df_melt[df_melt.price_point == "close"]["date_time"], y=df_melt[df_melt.price_point == "close"]["price"], name="EURUSD close"),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=df_melt[df_melt.price_point == "cci"]["date_time"], y=df_melt[df_melt.price_point == "cci"]["price"], name="cci"),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x=df_melt[df_melt.price_point == "aroon"]["date_time"], y=df_melt[df_melt.price_point == "aroon"]["price"], name="aroon"),
    row=3, col=1
)

fig.add_trace(
    go.Scatter(x=df_melt[df_melt.price_point == "ht"]["date_time"], y=df_melt[df_melt.price_point == "ht"]["price"], name="htdp"),
    row=4, col=1
)



fig.update_layout(height=600, width=800, title_text="Technical Analysis")
fig.write_image("ta.png")
fig.show()


In [None]:
ta_df.pop("date_time")

In [None]:
from sklearn.metrics import silhouette_score

inertia = []
sil_score = []

for n_clusters in range(2,14):
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    preds = kmeans.fit_predict(ta_df)
    inertia.append(kmeans.inertia_ / n_clusters)
    sil_score.append(silhouette_score(ta_df, preds))


inertias = pd.DataFrame({"n_clusters": range(2,14), "inertia": inertia})
silhouette_scores = pd.DataFrame({"n_clusters": range(2,14), "silhouette_score": sil_score})
fig = px.line(silhouette_scores, x="n_clusters", y="silhouette_score", title="Clustering Silhouette Score")

fig.update_layout(height=500, width=900)
fig.write_image("silhouette.png")
fig.show()

In [None]:
import numpy as np

train_df = ta_df.iloc[:30000]
test_df = ta_df.iloc[30000:]


kmeans = KMeans(n_clusters=3).fit(train_df)

train_labels = kmeans.labels_
train_df["label"] = train_labels

test_labels = kmeans.predict(test_df)

df["label"] = np.concatenate((train_labels, test_labels))
ta_df["label"] = df["label"]


In [None]:
ta_melt = ta_df.melt(id_vars=["label"],value_vars=["cci", "aroon", "ht"], value_name="value", var_name="indicator")

In [None]:
ta_melt = ta_melt.sort_values(by="label")

In [None]:
fig = px.box(ta_melt, x="indicator", y="value", color="label", title="Technical Indicator Box Plot by Cluster")

fig.update_layout(height=500, width=900)
fig.write_image("box.png")
fig.show()

In [None]:
prev_label = df.iloc[0].label
trace_ids = [0]
for i, row in df.iterrows():
    
    if row.label != prev_label:
        trace_ids.append(trace_ids[-1] + 1)
    else:
        trace_ids.append(trace_ids[-1])

    prev_label = row.label

df["trace"] = trace_ids[1:]

In [None]:
import plotly.graph_objects as go

df_short = df.iloc[2100:2500]

fig = go.Figure()

legend_groups = set()

label_color = {0: 'blue',
       1: 'red',
       2: 'green',
       3: "purple",
       4: "orange"
       }

for trace, idx in df_short.groupby(["trace"]).groups.items():
       fig.add_traces(go.Scatter(x=df_short.loc[idx, "date_time"],
                     y=df_short.loc[idx, "close"],
                     mode = 'lines',
                     marker = dict(color=label_color[df_short.loc[idx, "label"].iloc[0]]),
                     name = str(df_short.loc[idx, "label"].iloc[0]),
                     legendgroup = str(df_short.loc[idx, "label"].iloc[0]),
                     showlegend = False if df_short.loc[idx, "label"].iloc[0] in legend_groups else True))
       legend_groups.add(df_short.loc[idx, "label"].iloc[0])

                     
fig.update_layout(
       xaxis_title = "date-time",
       yaxis_title = "EURUSD",
       height=600, 
       width=1300, 
       title_text="EURUSD Time Series with Cluster Colors",
       font=dict(
        size=18
       ))
fig.write_image("cluster.png")
fig.show()

In [None]:
df_short = df.iloc[30000:]

fig = go.Figure()

legend_groups = set()

label_color = {0: 'blue',
       1: 'red',
       2: 'green',
       3: "purple",
       4: "orange"
       }

for trace, idx in df_short.groupby(["trace"]).groups.items():
       fig.add_traces(go.Scatter(x=df_short.loc[idx, "date_time"],
                     y=df_short.loc[idx, "close"],
                     mode = 'lines',
                     marker = dict(color=label_color[df_short.loc[idx, "label"].iloc[0]]),
                     name = str(df_short.loc[idx, "label"].iloc[0]),
                     legendgroup = str(df_short.loc[idx, "label"].iloc[0]),
                     showlegend = False if df_short.loc[idx, "label"].iloc[0] in legend_groups else True))
       legend_groups.add(df_short.loc[idx, "label"].iloc[0])

                     
fig.update_layout(
       xaxis_title = "date-time",
       yaxis_title = "EURUSD",
       height=600, 
       width=1300, 
       title_text="Test Set - EURUSD Time Series with Cluster Colors",
       font=dict(
        size=18
       ))
fig.write_image("cluster_test.png")
fig.show()