In [42]:
import yfinance as yf
import pandas as pd 
import sklearn.model_selection as sk
import numpy as np
from ignite.engine import Engine, Events
from ignite.handlers import EarlyStopping
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error
import plotly.graph_objects as go
from sklearn.cluster import KMeans
import plotly.express as px

In [43]:
data = yf.download('HEKTS.IS',period="1y",interval="1D")
data=data.rename({"High":"high","Low":"low","Close":"close","Open":"open","Adj Close":"adj close","Volume":"volume"},axis=1)

data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,open,high,low,close,adj close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-10-20,30.600000,31.100000,29.180000,30.020000,30.020000,75573981
2022-10-21,30.020000,30.139999,28.740000,29.500000,29.500000,39818081
2022-10-24,29.600000,29.860001,28.219999,28.959999,28.959999,36113748
2022-10-25,28.959999,29.180000,26.639999,28.000000,28.000000,48810621
2022-10-26,28.040001,29.639999,27.680000,28.940001,28.940001,42724267
...,...,...,...,...,...,...
2023-10-16,24.500000,24.760000,23.120001,23.160000,23.160000,43659210
2023-10-17,23.240000,23.760000,22.900000,23.760000,23.760000,38244470
2023-10-18,23.240000,23.600000,22.719999,22.719999,22.719999,36201053
2023-10-19,22.900000,23.420000,22.480000,22.760000,22.760000,38233750


In [44]:
fig=go.Figure()
fig.add_trace(
    go.Candlestick(x=data.index,open=data.open,close=data.close,high=data.high,low=data.low)
)

In [45]:
fig=go.Figure()
fig.add_trace(
    go.Scatter(x=data.index,y=data["adj close"])
)

In [46]:
# Convert adjusted closing price to numpy array
data_price = np.array(data["adj close"])


# Perform cluster analysis
K = 6
kmeans = KMeans(n_clusters=6).fit(data_price.reshape(-1, 1))

# predict which cluster each price is in
clusters = kmeans.predict(data_price.reshape(-1, 1))
print("Clusters:\n", clusters)






KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.



Clusters:
 [5 5 0 0 0 0 0 0 5 0 0 5 5 3 3 1 1 4 4 4 4 1 1 1 1 1 3 1 1 3 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 4 4 4 1 1 1 1 1 1 1 3 3 3 3 3 3 3 1 1 1 1 1 1 3 3 3 3 3 3 5
 5 3 3 5 0 2 5 3 3 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 5 5 5 5 0 0 0 5 5 0 0 0
 0 0 0 0 0 2 2 0 5 5 0 0 0 0 0 0 5 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 0 0 0 0 5 3 3 3 3 5 5 0 0 5 5 5 0 0 5 5 5 5 5 5 5 5 5 3 3 3 3 3 3 5 5
 5 5 5 0 5 5 5 5 5 5 5 5 5 5 5 0 0 0 0 0 0 2 0 2 2 2 2 2 2 2 2 2 2 0 0 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [47]:
# Assigns plotly as visualization engine

# Arbitrarily 6 colors for our 6 clusters
colors = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo']

# Create Scatter plot, assigning each point a color based
# on it's grouping where group_number == index of color.
fig=px.scatter(x=data.index,y=data["adj close"],color=[colors[i] for i in clusters])
# Configure some styles
fig.update_layout(
    go.Layout(
    plot_bgcolor='#efefef',
    showlegend=False,
    # Font Families
    font_family='Monospace',
    font_color='#000000',
    font_size=20,
    xaxis=dict(
        rangeslider=dict(
            visible=False
        ))
)
)

# Display plot in local browser window


In [48]:
# Create list to hold values, initialized with infinite values
min_max_values = []

# init for each cluster group
for i in range(6):

    # Add values for which no price could be greater or less
    min_max_values.append([np.inf, -np.inf])

# Print initial values
print(min_max_values)

# Get min/max for each cluster
for i in range(len(data_price)):

    # Get cluster assigned to price
    cluster = clusters[i]

    # Compare for min value
    if data_price[i] < min_max_values[cluster][0]:
        min_max_values[cluster][0] = data_price[i]

    # Compare for max value
    if data_price[i] > min_max_values[cluster][1]:
        min_max_values[cluster][1] = data_price[i]
# Print resulting values
print(min_max_values)



[[inf, -inf], [inf, -inf], [inf, -inf], [inf, -inf], [inf, -inf], [inf, -inf]]
[[26.600000381469727, 29.299999237060547], [36.400001525878906, 41.540000915527344], [22.579999923706055, 26.31999969482422], [32.2400016784668, 35.70000076293945], [42.380001068115234, 51.25], [29.399999618530273, 32.099998474121094]]


In [49]:


# Arbitrarily 6 colors for our 6 clusters
colors = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo']

# Create Scatter plot, assigning each point a color based
# on it's grouping where group_number == index of color.
fig=px.scatter(x=data.index,y=data["adj close"],color=[colors[i] for i in clusters])


for cluster_min, cluster_max in min_max_values:
    fig.add_hline(y=cluster_min, line_width=1, line_color="blue")
    fig.add_hline(y=cluster_max, line_width=1, line_color="blue")

# Add a trace of the price for better clarity
fig.add_trace(go.Trace(
    x=data.index,
    y=data['adj close'],
    line_color="black",
    line_width=1
))


# Configure some styles
fig.update_layout(
    go.Layout(
    plot_bgcolor='#efefef',
    showlegend=False,
    # Font Families
    font_family='Monospace',
    font_color='#000000',
    font_size=20,
    xaxis=dict(
        rangeslider=dict(
            visible=False
        ))
)
)



plotly.graph_objs.Trace is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.Scatter
  - plotly.graph_objs.Bar
  - plotly.graph_objs.Area
  - plotly.graph_objs.Histogram
  - etc.




In [50]:
print("Initial Min/Max Values:\n", min_max_values)

# Create container for combined values
output = []

# Sort based on cluster minimum
s = sorted(min_max_values, key=lambda x: x[0])

# For each cluster get average of
for i, (_min, _max) in enumerate(s):

    # Append min from first cluster
    if i == 0:
        output.append(_min)

    # Append max from last cluster
    if i == len(min_max_values) - 1:
        output.append(_max)

    # Append average from cluster and adjacent for all others
    else:
        output.append(sum([_max, s[i+1][0]]) / 2)




Initial Min/Max Values:
 [[26.600000381469727, 29.299999237060547], [36.400001525878906, 41.540000915527344], [22.579999923706055, 26.31999969482422], [32.2400016784668, 35.70000076293945], [42.380001068115234, 51.25], [29.399999618530273, 32.099998474121094]]


In [51]:
# Add horizontal lines 
for cluster_avg in output[1:-1]:
    fig.add_hline(y=cluster_avg, line_width=1, line_color="blue")
    
    

# Arbitrarily 6 colors for our 6 clusters
colors = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo']

# Create Scatter plot, assigning each point a color based
# on it's grouping where group_number == index of color.
fig=px.scatter(x=data.index,y=data["adj close"],color=[colors[i] for i in clusters])


# Add horizontal lines 
for cluster_avg in output[1:-1]:
    fig.add_hline(y=cluster_avg, line_width=1, line_color="blue")
    
    

# Add a trace of the price for better clarity
fig.add_trace(go.Trace(
    x=data.index,
    y=data['adj close'],
    line_color="black",
    line_width=1
))


# Configure some styles
fig.update_layout(
    go.Layout(
    plot_bgcolor='#efefef',
    showlegend=False,
    # Font Families
    font_family='Monospace',
    font_color='#000000',
    font_size=20,
    xaxis=dict(
        rangeslider=dict(
            visible=False
        ))
)
)


plotly.graph_objs.Trace is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.Scatter
  - plotly.graph_objs.Bar
  - plotly.graph_objs.Area
  - plotly.graph_objs.Histogram
  - etc.




In [52]:
# create a list to contain output values
values = []

# Define a range of cluster values to assess
K = range(1, 10)

# Performa a clustering using each value, save inertia_ value from each
for k in K:
    kmeans_n = KMeans(n_clusters=k)
    kmeans_n.fit(data_price.reshape(-1, 1))
    values.append(kmeans_n.inertia_)






KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.




KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.




KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.




KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.




KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.




KMeans is known to have a memory leak on Windows with MKL, when the

In [53]:
import plotly.graph_objects as go

# Create initial figure
fig = go.Figure()

# Add line plot of inertia values
fig.add_trace(go.Trace(
    x=list(K),
    y=values,
    line_color="black",
    line_width=1
))

# Make it pretty
layout = go.Layout(
    plot_bgcolor='#efefef',
    showlegend=False,
    # Font Families
    font_family='Monospace',
    font_color='#000000',
    font_size=20,
    xaxis=dict(
        rangeslider=dict(
            visible=False
        ))
)
fig.update_layout(layout)
fig.show()


plotly.graph_objs.Trace is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.Scatter
  - plotly.graph_objs.Bar
  - plotly.graph_objs.Area
  - plotly.graph_objs.Histogram
  - etc.




In [54]:
# Convert adjusted closing price to numpy array
data_price = np.array(data["adj close"])


# Perform cluster analysis
K = 3
kmeans = KMeans(n_clusters=K).fit(data_price.reshape(-1, 1))

# predict which cluster each price is in
clusters = kmeans.predict(data_price.reshape(-1, 1))
print("Clusters:\n", clusters)


# Create list to hold values, initialized with infinite values
min_max_values = []

# init for each cluster group
for i in range(K):

    # Add values for which no price could be greater or less
    min_max_values.append([np.inf, -np.inf])

# Print initial values
print(min_max_values)

# Get min/max for each cluster
for i in range(len(data_price)):

    # Get cluster assigned to price
    cluster = clusters[i]

    # Compare for min value
    if data_price[i] < min_max_values[cluster][0]:
        min_max_values[cluster][0] = data_price[i]

    # Compare for max value
    if data_price[i] > min_max_values[cluster][1]:
        min_max_values[cluster][1] = data_price[i]
# Print resulting values
print(min_max_values)


print("Initial Min/Max Values:\n", min_max_values)

# Create container for combined values
output = []

# Sort based on cluster minimum
s = sorted(min_max_values, key=lambda x: x[0])

# For each cluster get average of
for i, (_min, _max) in enumerate(s):

    # Append min from first cluster
    if i == 0:
        output.append(_min)

    # Append max from last cluster
    if i == len(min_max_values) - 1:
        output.append(_max)

    # Append average from cluster and adjacent for all others
    else:
        output.append(sum([_max, s[i+1][0]]) / 2)



# Add horizontal lines 
for cluster_avg in output[1:-1]:
    fig.add_hline(y=cluster_avg, line_width=1, line_color="blue")
    
    

# Arbitrarily 6 colors for our 6 clusters
colors = ['red', 'indigo', 'orange']

# Create Scatter plot, assigning each point a color based
# on it's grouping where group_number == index of color.
fig=px.scatter(x=data.index,y=data["adj close"],color=[colors[i] for i in clusters])


# Add horizontal lines 
for cluster_avg in output[1:-1]:
    fig.add_hline(y=cluster_avg, line_width=1, line_color="blue")
    
    

# Add a trace of the price for better clarity
fig.add_trace(go.Trace(
    x=data.index,
    y=data['adj close'],
    line_color="black",
    line_width=1
))


# Configure some styles
fig.update_layout(
    go.Layout(
    plot_bgcolor='#efefef',
    showlegend=False,
    # Font Families
    font_family='Monospace',
    font_color='#000000',
    font_size=20,
    xaxis=dict(
        rangeslider=dict(
            visible=False
        ))
)
)





KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.



Clusters:
 [0 0 0 2 0 2 2 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0
 0 0 0 0 2 2 0 0 0 1 1 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0
 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
[[inf, -inf], [inf, -inf], [inf, -inf]]
[[28.399999618530273, 34.560001373291016], [34.70000076293945, 51.25], [22.579999923706055, 28.219999313354492]]
Initial Min/Max Values:
 [[28.399999618530273, 34.560001373291016], [34.70000076293945, 51.25], [22.579999923706055, 28.219999313354492]]



plotly.graph_objs.Trace is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.Scatter
  - plotly.graph_objs.Bar
  - plotly.graph_objs.Area
  - plotly.graph_objs.Histogram
  - etc.


