In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
import ast
from sktime.transformations.series.sax import SAX
from prefixspan import PrefixSpan
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sklearn.preprocessing import StandardScaler
from tslearn.piecewise import SymbolicAggregateApproximation
from pyts.approximation import paa as paa

In [None]:
def load_npy(filename):
    return np.load(filename)


dir_path = 'cleaned_time_series/'
len_threshold = 1280
X, y, ids = [], [], []

for file in os.listdir(dir_path):
    if os.path.splitext(file)[1] != '.npy':
        continue

    split = file.split("_")
    ids.append(split[0])  # track_id
    y.append(split[1][:-4])  # genre
    ts = load_npy(dir_path + file)

    if len(ts) > len_threshold:
        ts = ts[0:len_threshold]
    else:
        # pad = [np.mean(ts[:-5])] * (len_threshold-len(ts)) # fill by mean value of last n observations
        pad = [ts[-1]] * (len_threshold - len(ts))  # fill with last observation
        ts = np.append(ts, pad)

    X.append([ts])

X, y, ids = np.array(X), np.array(y), np.array(ids)
print(len(X))

In [None]:
X.shape

In [None]:
# Initialize PAA transformer
approximator = paa.PiecewiseAggregateApproximation(window_size=4)

# Apply PAA to your time series data
X_paa = approximator.transform(X.reshape(-1, 1280))

print(X_paa.shape)

In [None]:
# subset of PAA with only happy genre
X_paa_happy = X_paa[y == 'happy']

In [None]:
X_paa_happy.shape

# SAX transformation

## First step - Normalization

In [None]:
X_paa_happy.shape

In [None]:
X_paa_happy = X_paa_happy.reshape(X_paa_happy.shape[0], 1, X_paa_happy.shape[1])

X_paa_happy.shape

In [None]:
# Import tabular to series adaptor and standard scaler
scaler = TabularToSeriesAdaptor(StandardScaler(), fit_in_transform=True)
X_happy = scaler.fit_transform(X_paa_happy) #if 3D np.ndarray should be of shape (n_instances, n_variables, n_timepoints)

## Second step - Definition of Symbolic Aggregate Approximation (SAX) parameters

In [None]:
n_segments = 32  # Number of segments (number of segments equals 128 means that each segment will represent 10 original time points)
n_symbols = 10    # Number of symbols; also adjustable
sax = SymbolicAggregateApproximation(n_segments=n_segments, alphabet_size_avg=n_symbols)

## Third step - Apply SAX transformation

In [None]:
X_happy = X_happy.reshape(-1, 320, 1)

In [None]:
# Apply SAX transformation
X_sax = sax.fit_transform(X_happy)

# Inverse transform to get the symbols
symbols = sax.inverse_transform(X_sax)

## Fourth step - PrefixSpan algorithm to find frequent patterns

In [None]:
symbols_str = np.array([', '.join(map(str, s.flatten())) for s in symbols])

In [None]:
#subselect first 10 elements of symbols_str
example = symbols_str[:3]
example = example.tolist()

example_list = [ast.literal_eval('[' + s + ']') for s in example]

lst = [[0, 1, 2, 3, 4],
       [1, 1, 1, 3, 4],
       [2, 1, 2, 2, 0],
       [1, 1, 1, 2, 2]]

In [None]:
lst

In [None]:
example

In [None]:
example_list

In [None]:
%%time
ps = PrefixSpan(example_list)
frequent_patterns = ps.frequent(1)  # With a minimum support of 10

In [None]:
X_sax.shape

In [None]:
symbols[3].tolist()

In [None]:
frequent_patterns

# SAX in another way (The one of the professor)

In [12]:
sax = SAX(word_size=32, alphabet_size=10)

In [13]:
happy_transformed = sax.fit_transform(X_paa_happy)

In [14]:
happy_transformed.shape

(32, 320)

In [19]:
happy_transformed

array([[4., 3., 2., ..., 4., 5., 4.],
       [6., 6., 6., ..., 5., 4., 5.],
       [2., 3., 3., ..., 5., 5., 5.],
       ...,
       [4., 3., 4., ..., 5., 3., 3.],
       [5., 6., 5., ..., 4., 3., 4.],
       [4., 4., 4., ..., 5., 4., 5.]])

In [24]:
# Transform happy_transformed to list of integers
happy_transformed_int = []

for i in range(happy_transformed.shape[0]):
    happy_transformed_int.append(happy_transformed[i].tolist())

happy_transformed_int

[[4.0,
  3.0,
  2.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  2.0,
  3.0,
  3.0,
  3.0,
  4.0,
  3.0,
  4.0,
  5.0,
  3.0,
  4.0,
  4.0,
  5.0,
  4.0,
  5.0,
  5.0,
  5.0,
  4.0,
  3.0,
  5.0,
  4.0,
  5.0,
  4.0,
  5.0,
  4.0,
  5.0,
  3.0,
  4.0,
  4.0,
  4.0,
  3.0,
  2.0,
  3.0,
  3.0,
  4.0,
  3.0,
  4.0,
  3.0,
  4.0,
  4.0,
  4.0,
  4.0,
  5.0,
  4.0,
  3.0,
  5.0,
  4.0,
  4.0,
  4.0,
  4.0,
  3.0,
  4.0,
  6.0,
  4.0,
  3.0,
  5.0,
  3.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  3.0,
  3.0,
  4.0,
  6.0,
  4.0,
  3.0,
  4.0,
  5.0,
  5.0,
  5.0,
  5.0,
  4.0,
  4.0,
  3.0,
  5.0,
  6.0,
  4.0,
  4.0,
  5.0,
  4.0,
  5.0,
  6.0,
  5.0,
  5.0,
  4.0,
  6.0,
  4.0,
  4.0,
  4.0,
  5.0,
  4.0,
  4.0,
  6.0,
  6.0,
  4.0,
  5.0,
  6.0,
  6.0,
  4.0,
  4.0,
  4.0,
  5.0,
  3.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  5.0,
  4.0,
  5.0,
  4.0,
  4.0,
  5.0,
  4.0,
  4.0,
  6.0,
  6.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  5.0,
  4.0,
  4.0,
  5.0,
  5.0,
  4.0,
  4.0,
  5.0,

In [25]:
# Create a subset of only 5 elements of happy_transformed_int
happy_transformed_int_subset = happy_transformed_int[:5]
happy_transformed_int_subset

[[4.0,
  3.0,
  2.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  2.0,
  3.0,
  3.0,
  3.0,
  4.0,
  3.0,
  4.0,
  5.0,
  3.0,
  4.0,
  4.0,
  5.0,
  4.0,
  5.0,
  5.0,
  5.0,
  4.0,
  3.0,
  5.0,
  4.0,
  5.0,
  4.0,
  5.0,
  4.0,
  5.0,
  3.0,
  4.0,
  4.0,
  4.0,
  3.0,
  2.0,
  3.0,
  3.0,
  4.0,
  3.0,
  4.0,
  3.0,
  4.0,
  4.0,
  4.0,
  4.0,
  5.0,
  4.0,
  3.0,
  5.0,
  4.0,
  4.0,
  4.0,
  4.0,
  3.0,
  4.0,
  6.0,
  4.0,
  3.0,
  5.0,
  3.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  3.0,
  3.0,
  4.0,
  6.0,
  4.0,
  3.0,
  4.0,
  5.0,
  5.0,
  5.0,
  5.0,
  4.0,
  4.0,
  3.0,
  5.0,
  6.0,
  4.0,
  4.0,
  5.0,
  4.0,
  5.0,
  6.0,
  5.0,
  5.0,
  4.0,
  6.0,
  4.0,
  4.0,
  4.0,
  5.0,
  4.0,
  4.0,
  6.0,
  6.0,
  4.0,
  5.0,
  6.0,
  6.0,
  4.0,
  4.0,
  4.0,
  5.0,
  3.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  5.0,
  4.0,
  5.0,
  4.0,
  4.0,
  5.0,
  4.0,
  4.0,
  6.0,
  6.0,
  4.0,
  4.0,
  4.0,
  4.0,
  4.0,
  5.0,
  4.0,
  4.0,
  5.0,
  5.0,
  4.0,
  4.0,
  5.0,

In [None]:
%%time
ps = PrefixSpan(happy_transformed_int_subset)
frequent_patterns = ps.frequent(1)