In [2]:
import pandas as pd
import numpy as np
from statsmodels.tsa.seasonal import STL
from tslearn.clustering import TimeSeriesKMeans
import matplotlib.pyplot as plt

In [4]:
# Read the CSV file and extract date and features
data = pd.read_csv('time_series.csv')

# Read the CSV file and extract date and features
date_column = 'date'
features = [f'MA_{i}' for i in range(1, 11)]

# Convert the date column to datetime format and set it as index
data[date_column] = pd.to_datetime(data[date_column])
data.set_index(date_column, inplace=True)

# Create an empty DataFrame to store the trend components
trend_data = pd.DataFrame(index=data.index)

# Perform STL decomposition on each feature
for feature in features:
    # Choose an appropriate seasonal period
    stl = STL(data[feature], seasonal=13)
    result = stl.fit()
    trend_data[feature] = result.trend

In [5]:
# Choose an appropriate seasonal period
data_transposed = trend_data.T

# Convert the transposed DataFrame to a 3D numpy array for tslearn
formatted_data = data_transposed.to_numpy().reshape(data_transposed.shape[0], data_transposed.shape[1], 1)

In [None]:
# Define a range to try different clusters
cluster_range = range(1, 10)

# Initialize a list to store the inertia values for each number of clusters
inertia_values = []

for k in cluster_range:
    # Define time series K-means clustering model
    model = TimeSeriesKMeans(n_clusters=k, metric="euclidean", verbose=False,
                   max_iter=100,n_init=10,random_state=98)
    model.fit(formatted_data)
    inertia_values.append(model.inertia_)

# Draw the elbow curve
plt.figure(figsize=(8, 4))
plt.plot(cluster_range, inertia_values, marker='o')
plt.title('Elbow Method For Optimal k')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

In [7]:
# Define the TimeSeriesKMeans model
clusters = 3
model = TimeSeriesKMeans(n_clusters=clusters, metric="euclidean",
              verbose=False, max_iter=100,
              n_init=10,random_state=98)

# Train the model
y_pred = model.fit_predict(formatted_data)

In [None]:
# Get a list of feature names
feature_names = data.columns.tolist()

# Create a dictionary that maps each cluster label to an empty list
clusters = {i: [] for i in range(max(y_pred) + 1)}

# Assign each mining area to the corresponding cluster
for i, cluster in enumerate(y_pred):
    clusters[cluster].append(feature_names[i])

# Print each cluster and the mining areas it contains
for cluster, features in clusters.items():
    print(f"Cluster {cluster}: {', '.join(features)}")