# Test tsfresh

In [6]:
from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, \
    load_robot_execution_failures
download_robot_execution_failures()
timeseries, y = load_robot_execution_failures()

RuntimeError: You don't have the necessary permissions to download the Robot Execution Failures LP1 Data Set into the module path. Consider installing the module in a virtualenv you own or run this function with appropriate permissions.

In [None]:
print(timeseries.head())

In [None]:
import matplotlib.pyplot as plt
timeseries[timeseries['id'] == 3].plot(subplots=True, sharex=True, figsize=(10,10))
plt.show()

In [None]:
timeseries[timeseries['id'] == 21].plot(subplots=True, sharex=True, figsize=(10,10))
plt.show()

In [None]:
from tsfresh import extract_features
extracted_features = extract_features(timeseries, column_id="id", column_sort="time")

In [None]:
extracted_features

In [None]:
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute

impute(extracted_features)
features_filtered = select_features(extracted_features, y)

In [None]:
from tsfresh import extract_relevant_features

features_filtered_direct = extract_relevant_features(timeseries, y,
                                                     column_id='id', column_sort='time')

In [None]:
features_filtered_direct

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3).fit(features_filtered_direct)
centroids = kmeans.cluster_centers_
print(centroids)

plt.scatter(features_filtered_direct["F_x__abs_energy"],features_filtered_direct["F_y__abs_energy"], c= kmeans.labels_.astype(float), s=50, alpha=0.5)
plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=50)

In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
from scipy import stats
import scipy.cluster.hierarchy as hac
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import fcluster
import typing


def plot_dendogram(clust):
    plt.figure(figsize=(25, 10))
    plt.title("Hierarchical Clustering Dendrogram")
    plt.xlabel("sample index")
    plt.ylabel("distance")
    hac.dendrogram(
        clust,
        leaf_rotation=90.0,  # rotates the x axis labels
        leaf_font_size=8,  # font size for the x axis labels
    )
    plt.show()


def spearman_metric(x, y):
    r = stats.pearsonr(x, y)[0]
    return 1 - r  # correlation to distance: range 0 to 2


def clusterize(tss: pd.DataFrame, metric: typing.Callable = "correlation"):
    return hac.linkage(tss, method="single", metric=metric)


def print_clusters(tss, Z, k, plot=False):
    # k Number of clusters I'd like to extract
    results = fcluster(Z, k, criterion="maxclust")

    # check the results
    s = pd.Series(results)
    clusters = s.unique()

    for clust in clusters:
        cluster_indeces = s[s == clust].index
        print(
            f"Cluster {clust} number of entries {len(cluster_indeces)} : {list(cluster_indeces)}"
        )
        if plot:
            tss.T.iloc[:, cluster_indeces].plot()
            plt.show()


def build_test_dataframe(n) -> pd.DataFrame():
    x = np.linspace(0, 5, n)
    scale = 4

    a = scale * np.sin(x)
    b = scale * (np.cos(1 + x * 3) + np.linspace(0, 1, n))
    c = scale * (np.sin(2 + x * 6) + np.linspace(0, -1, n))
    d = scale * (np.cos(3 + x * 9) + np.linspace(0, 4, n))
    e = scale * (np.sin(4 + x * 12) + np.linspace(0, -4, n))
    f = scale * np.cos(x)

    #
    # from each main series build 'group_size' series
    #

    return pd.DataFrame([a, b, c, d, e, f])



In [None]:
cluster = clusterize(features_filtered_direct)
plot_dendogram(cluster)
print_clusters(features_filtered_direct, cluster, 7, plot=True)