In [1]:
%load_ext autoreload 
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import config
import random

random.seed(42)

In [5]:
def read_preprocessed_dfs(labels):
    dfs = []
    df_labels = []
    for label in labels:
        files = Path(config.DESTINATION_FOLDER, label).glob('*.csv')
        for file in files:
            df = pd.read_csv(file)
            dfs.append(df)
            df_labels.append(label)
            
    return dfs, df_labels    

In [85]:
def normalise_dfs(dfs, labels):
    dfs = [_df.copy() for _df in dfs]
#     means = [df.mean() for df in dfs]
#     dfs = [(df - df.min()) / (df.max() - df.min()) for df in dfs]
    max_len = max([len(df) for df in dfs])
    dfs = [(df, label) for df, label in zip(dfs, labels) if len(df) == max_len]
#     random.seed(43)
    random.shuffle(dfs)
    return zip(*dfs)

In [104]:
all_dfs, all_labels = read_preprocessed_dfs(['idle', 'jogging', 'exercise'])

In [105]:
normalised_dfs, normalised_labels = normalise_dfs(all_dfs, all_labels)

In [106]:
len(normalised_dfs)

36

In [107]:
unique, counts = np.unique(np.array(normalised_labels), return_counts=True)
print(unique, counts)

['exercise' 'idle' 'jogging'] [ 7  9 20]


In [108]:
normalised_labels, len(normalised_dfs)

(('idle',
  'idle',
  'jogging',
  'jogging',
  'jogging',
  'exercise',
  'exercise',
  'jogging',
  'jogging',
  'idle',
  'jogging',
  'jogging',
  'jogging',
  'idle',
  'exercise',
  'jogging',
  'jogging',
  'jogging',
  'jogging',
  'idle',
  'exercise',
  'jogging',
  'jogging',
  'jogging',
  'jogging',
  'exercise',
  'idle',
  'exercise',
  'idle',
  'idle',
  'jogging',
  'idle',
  'jogging',
  'jogging',
  'exercise',
  'jogging'),
 36)

In [109]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [110]:
from sktime.classification.interval_based import (
    CanonicalIntervalForest,
    DrCIF,
    RandomIntervalSpectralEnsemble,
    SupervisedTimeSeriesForest,
    TimeSeriesForestClassifier,
)
classifier = CanonicalIntervalForest(n_estimators=10)
classifier.fit(list(normalised_dfs[:25]), np.array(normalised_labels[:25]))

CanonicalIntervalForest(n_estimators=10)

In [111]:
classifier.score(list(normalised_dfs[25:]), np.array(normalised_labels[25:]))

0.9090909090909091

In [112]:
predictions = classifier.predict(list(normalised_dfs[25:]))

In [113]:
predictions, normalised_labels[25:]

(array(['jogging', 'idle', 'exercise', 'idle', 'idle', 'jogging', 'idle',
        'jogging', 'jogging', 'exercise', 'jogging'], dtype='<U8'),
 ('exercise',
  'idle',
  'exercise',
  'idle',
  'idle',
  'jogging',
  'idle',
  'jogging',
  'jogging',
  'exercise',
  'jogging'))

In [114]:
label_dict = {'idle': 0, 'jogging': 1, 'exercise': 2}
confusion_matrix([label_dict[label] for label in normalised_labels[25:]], [label_dict[label] for label in predictions])

array([[4, 0, 0],
       [0, 4, 0],
       [0, 1, 2]], dtype=int64)