In [2016]:
!pip install sktime



In [2017]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from time import sleep
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sktime.classification.kernel_based import RocketClassifier
from sktime.datatypes import convert_to
from sktime.datatypes._panel._convert import from_nested_to_3d_numpy
from sktime.datatypes import check_is_mtype


In [2018]:
nr_of_samples_in_one_batch = 100

In [2019]:
def convert(df, list_cols=None, label_col='label'):
    if list_cols is None:
        list_cols = ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z']

    # 1. Reset index so we have a 'instance' column
    df = df.reset_index(drop=True).rename_axis('instance').reset_index()

    # 2. Explode list columns
    df = df.explode(list_cols, ignore_index=True)

    # 3. Create a 'time' column for each exploded element
    df['time'] = df.groupby('instance').cumcount()

    # 4. Set a MultiIndex of (instance, time)
    df = df.set_index(['instance', 'time'])

    # 5. Convert all non-label columns to float
    numeric_cols = [col for col in df.columns if col != label_col]
    df[numeric_cols] = df[numeric_cols].astype(float)

    return df


In [2020]:
def read_X_samples(X, nr, samples):
    return samples.iloc[(nr*X):(nr*X+X), :]
        

In [2021]:
def addEntryToDataFrame(df, data, label):
    new_entry = {f"feature_{i}": [np.array(data.iloc[:, i].values, dtype=np.float64)]  # Wrap in list
                 for i in range(data.shape[1])}
    new_entry["label"] = [label]  # Wrap label in list
    
    # Convert to DataFrame
    df_entry = pd.DataFrame(new_entry)

    # Concatenate
    df = pd.concat([df, df_entry], ignore_index=True)
    return df


In [2022]:
def concatenate_dataFrames(dF1, dF2):
    frame = [dF1, dF2]
    return pd.concat(frame, ignore_index=True)

In [2023]:
def renameFeatures(dF):
    preferred_names = ["acc_x", "acc_y", "acc_z", "gyr_x", "gyr_y", "gyr_z", "label"]
    dF.columns = preferred_names[:len(preferred_names)]
    return dF

In [2024]:
def addWholeDatasetToDataFrame(dF, data, label):
    nr_of_batches = round(data.dropna(how='all').shape[0] / nr_of_samples_in_one_batch)
    
    for i in range(nr_of_batches):
        batch = read_X_samples(nr_of_samples_in_one_batch, i, data)
        dF = addEntryToDataFrame(dF, batch, label)
    dF = renameFeatures(dF)
    return dF

In [2025]:
def readFile_addToDataFrame(filename, dF, label_name):
    motion = pd.read_csv(filename, na_filter=False, sep=r"\s+", header=None, dtype=np.float64)
    dF = addWholeDatasetToDataFrame(dF, motion, label_name)
    dF["label"] = dF["label"].astype("category")
    return dF

In [2026]:
def shuffle_instances(df):
    # Get unique instance values and shuffle them
    shuffled_instances = shuffle(df.index.levels[0])  # Shuffle the instances
    
    # Reorder the DataFrame based on shuffled instances while keeping time order intact
    df_shuffled = pd.concat([df.loc[i] for i in shuffled_instances], keys=shuffled_instances)

    return df_shuffled

In [2027]:
dF_updown = pd.DataFrame()
dF_circles = pd.DataFrame()

In [2028]:
dF_updown = readFile_addToDataFrame('up_down2.txt', dF_updown, 'Up and down')
dF_circles = readFile_addToDataFrame('circles2.txt', dF_circles, 'Circle')

In [2029]:
dF_updown['acc_x'].dtype, dF_updown['acc_x'][0].dtype, dF_updown['label'].dtype

(dtype('O'),
 dtype('float64'),
 CategoricalDtype(categories=['Up and down'], ordered=False, categories_dtype=object))

In [2030]:
print(check_is_mtype(dF_updown, "nested_univ")) # Check if type is correct for sktime

False


In [2031]:
print(type(dF_updown.iloc[0, 0]))

<class 'numpy.ndarray'>


In [2032]:
dF_updown.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   acc_x   33 non-null     object  
 1   acc_y   33 non-null     object  
 2   acc_z   33 non-null     object  
 3   gyr_x   33 non-null     object  
 4   gyr_y   33 non-null     object  
 5   gyr_z   33 non-null     object  
 6   label   33 non-null     category
dtypes: category(1), object(6)
memory usage: 1.8+ KB


In [2033]:
dF_circles

Unnamed: 0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
0,"[-42.97, -50.78, -50.29, -42.97, -36.13, -40.5...","[1.95, 2.44, 0.0, 10.25, 7.81, 0.49, 0.98, 6.8...","[1033.2, 1032.23, 1041.5, 1035.64, 1046.88, 10...","[-1.37, -1.34, -0.43, -2.44, -1.52, -2.35, -3....","[-0.91, -2.8, -1.25, -1.25, -2.93, -0.24, -2.1...","[0.67, 1.49, -0.98, -2.04, -1.71, 0.09, -1.16,...",Circle
1,"[-57.62, -45.41, -48.34, -56.15, -62.5, -39.06...","[-2.44, 10.74, 5.86, 0.98, -5.37, 0.49, 6.84, ...","[1041.5, 1060.55, 1041.02, 1038.57, 1042.48, 1...","[0.3, -1.34, 0.15, -0.21, -1.37, -3.57, -3.41,...","[-1.55, -2.74, -2.32, -2.87, -0.7, -1.22, -2.3...","[-0.52, 1.16, -0.46, 0.46, 1.49, -2.9, -0.15, ...",Circle
2,"[-41.99, -48.83, -35.16, -48.34, -51.27, -48.3...","[-1.46, -7.32, -6.35, -2.93, -0.49, 5.86, 2.44...","[1042.97, 1042.48, 1041.5, 1045.41, 1050.78, 1...","[0.7, -2.01, 0.52, -0.06, 1.22, -1.95, -1.19, ...","[-1.1, -1.31, -1.46, -2.47, -0.82, 0.49, -0.98...","[1.62, -0.67, 1.01, -1.16, 1.65, -0.24, -0.61,...",Circle
3,"[-54.2, -45.9, -50.29, -40.04, -34.67, -44.43,...","[3.91, -8.3, -0.49, -0.98, 8.79, 5.86, -10.74,...","[1041.99, 1045.41, 1032.23, 1035.16, 1042.48, ...","[-0.95, -0.37, -1.13, -1.8, -0.4, -0.27, -0.37...","[0.09, -1.49, -1.25, 0.52, -0.21, -3.6, 0.49, ...","[-2.44, 1.07, -2.01, -0.67, 0.27, 1.65, -1.71,...",Circle
4,"[-42.48, -54.69, -56.64, -33.2, -45.9, -41.99,...","[-0.49, -7.81, -4.39, -7.81, 3.42, 12.7, -2.93...","[1039.06, 1046.39, 1042.48, 1045.9, 1035.64, 1...","[0.43, -1.71, -1.83, -2.71, -0.46, -1.55, -2.9...","[-1.34, 0.76, -1.98, 0.03, 0.82, -0.95, -0.37,...","[1.4, 0.61, -0.18, -1.22, -0.7, -0.12, -1.46, ...",Circle
5,"[-50.78, -43.46, -40.04, -57.62, -45.41, -46.8...","[-8.79, -3.42, -5.37, -5.37, -4.88, 4.39, 2.93...","[1031.25, 1055.18, 1031.74, 1032.23, 1047.85, ...","[-1.52, -1.74, -1.13, 0.37, 0.09, 1.04, -1.89,...","[-0.09, -2.53, -2.44, 0.09, -2.9, 0.18, -0.82,...","[-1.04, 1.04, 0.61, -1.01, -0.73, -1.19, 1.89,...",Circle
6,"[-210.94, -226.56, -7.81, -177.73, -324.71, -1...","[19.04, -121.58, -104.98, 169.43, -37.6, -79.5...","[1079.1, 895.51, 1018.07, 1042.48, 1035.16, 10...","[0.4, -6.74, 5.37, 7.07, -12.74, -16.46, -7.56...","[-0.88, 4.21, 0.82, -0.21, 9.66, 16.04, 23.05,...","[34.97, 24.76, 15.64, 19.24, 24.6, -3.23, -11....",Circle
7,"[-69.34, -24.41, -23.93, -24.9, -23.93, -44.92...","[-30.27, 7.32, -2.93, -0.49, -17.58, -89.84, -...","[1029.3, 1025.39, 1028.32, 1045.9, 1035.64, 10...","[-0.7, -0.98, -3.84, -2.07, -3.35, -1.19, 0.03...","[-2.38, -1.1, -1.86, 0.15, -1.71, -0.03, -0.61...","[-1.62, -1.01, -0.98, -1.92, 1.22, 7.62, 7.96,...",Circle
8,"[-8.3, -18.55, -24.41, -15.63, -20.02, -23.44,...","[-4.39, -0.49, -5.37, -9.28, -7.81, 9.28, -0.4...","[1039.55, 1046.88, 1038.09, 1053.71, 1036.62, ...","[-1.89, 0.09, -1.4, -2.1, 1.19, -1.68, -1.01, ...","[-1.07, -1.37, -5.12, -1.55, -0.18, -0.76, -2....","[0.88, 0.3, 0.49, 0.88, -1.74, 2.13, 1.07, -3....",Circle
9,"[-17.09, -14.65, -21.0, -22.95, -17.09, -23.44...","[-0.98, -7.81, -15.63, 4.88, 0.98, 6.35, 0.49,...","[1042.48, 1040.53, 1039.06, 1035.64, 1044.92, ...","[-1.43, -1.8, -0.12, -0.03, -1.55, 0.7, -1.13,...","[-0.85, -1.62, -2.2, -0.85, -2.2, -0.91, -2.62...","[0.15, -0.21, 1.98, 2.35, 0.73, 1.4, 2.59, -0....",Circle


In [2034]:
print(len(dF_circles.loc[0, 'acc_x']))

for i in range(33):
    print(len(dF_circles.loc[i, 'acc_x']))

print(dF_circles.loc[10, 'acc_x'])


100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
[ 181.64  198.73  323.73  228.03   24.41 -111.82 -319.34 -191.41 -280.76
 -436.52 -373.54 -159.18 -259.28  -83.98 -130.37 -104.49   83.98  324.22
  296.87  355.96  371.09  487.79  295.41  164.55  183.59  202.15  272.95
   97.17  -32.23 -134.77 -211.43  -41.99 -109.37 -539.06 -233.4  -268.07
 -321.78 -316.41 -278.32 -389.65  -61.52 -194.34 -187.99  -41.99  157.23
  292.97   13.67  306.64  462.4   274.41  317.38  212.89  129.39  211.43
  172.85  -30.27    8.3  -210.94 -330.08 -211.43 -387.21 -453.12 -421.39
 -349.61 -217.29 -283.69 -202.64 -246.09  -48.83 -239.26   61.52  -59.57
  291.5   333.98  411.13  224.61  630.37  320.31  279.79  137.21  178.22
  180.18  311.04   40.53  -73.73 -271.97 -167.48 -523.44 -288.57 -446.29
 -564.45 -334.47 -323.73 -394.04 -187.99 -105.96 -177.73   33.69  104.
  127.93]


In [2035]:
totalDataFrame = concatenate_dataFrames(dF_circles, dF_updown)

In [2036]:
dF_circles.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   acc_x   33 non-null     object  
 1   acc_y   33 non-null     object  
 2   acc_z   33 non-null     object  
 3   gyr_x   33 non-null     object  
 4   gyr_y   33 non-null     object  
 5   gyr_z   33 non-null     object  
 6   label   33 non-null     category
dtypes: category(1), object(6)
memory usage: 1.8+ KB


In [2037]:
dF_updown.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   acc_x   33 non-null     object  
 1   acc_y   33 non-null     object  
 2   acc_z   33 non-null     object  
 3   gyr_x   33 non-null     object  
 4   gyr_y   33 non-null     object  
 5   gyr_z   33 non-null     object  
 6   label   33 non-null     category
dtypes: category(1), object(6)
memory usage: 1.8+ KB


In [2038]:
totalDataFrame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66 entries, 0 to 65
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   acc_x   66 non-null     object
 1   acc_y   66 non-null     object
 2   acc_z   66 non-null     object
 3   gyr_x   66 non-null     object
 4   gyr_y   66 non-null     object
 5   gyr_z   66 non-null     object
 6   label   66 non-null     object
dtypes: object(7)
memory usage: 3.7+ KB


In [2039]:
#for row in range(len(totalDataFrame)):
 #   print(totalDataFrame.iloc[row]) 

In [2040]:
totalDataFrame

Unnamed: 0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
0,"[-42.97, -50.78, -50.29, -42.97, -36.13, -40.5...","[1.95, 2.44, 0.0, 10.25, 7.81, 0.49, 0.98, 6.8...","[1033.2, 1032.23, 1041.5, 1035.64, 1046.88, 10...","[-1.37, -1.34, -0.43, -2.44, -1.52, -2.35, -3....","[-0.91, -2.8, -1.25, -1.25, -2.93, -0.24, -2.1...","[0.67, 1.49, -0.98, -2.04, -1.71, 0.09, -1.16,...",Circle
1,"[-57.62, -45.41, -48.34, -56.15, -62.5, -39.06...","[-2.44, 10.74, 5.86, 0.98, -5.37, 0.49, 6.84, ...","[1041.5, 1060.55, 1041.02, 1038.57, 1042.48, 1...","[0.3, -1.34, 0.15, -0.21, -1.37, -3.57, -3.41,...","[-1.55, -2.74, -2.32, -2.87, -0.7, -1.22, -2.3...","[-0.52, 1.16, -0.46, 0.46, 1.49, -2.9, -0.15, ...",Circle
2,"[-41.99, -48.83, -35.16, -48.34, -51.27, -48.3...","[-1.46, -7.32, -6.35, -2.93, -0.49, 5.86, 2.44...","[1042.97, 1042.48, 1041.5, 1045.41, 1050.78, 1...","[0.7, -2.01, 0.52, -0.06, 1.22, -1.95, -1.19, ...","[-1.1, -1.31, -1.46, -2.47, -0.82, 0.49, -0.98...","[1.62, -0.67, 1.01, -1.16, 1.65, -0.24, -0.61,...",Circle
3,"[-54.2, -45.9, -50.29, -40.04, -34.67, -44.43,...","[3.91, -8.3, -0.49, -0.98, 8.79, 5.86, -10.74,...","[1041.99, 1045.41, 1032.23, 1035.16, 1042.48, ...","[-0.95, -0.37, -1.13, -1.8, -0.4, -0.27, -0.37...","[0.09, -1.49, -1.25, 0.52, -0.21, -3.6, 0.49, ...","[-2.44, 1.07, -2.01, -0.67, 0.27, 1.65, -1.71,...",Circle
4,"[-42.48, -54.69, -56.64, -33.2, -45.9, -41.99,...","[-0.49, -7.81, -4.39, -7.81, 3.42, 12.7, -2.93...","[1039.06, 1046.39, 1042.48, 1045.9, 1035.64, 1...","[0.43, -1.71, -1.83, -2.71, -0.46, -1.55, -2.9...","[-1.34, 0.76, -1.98, 0.03, 0.82, -0.95, -0.37,...","[1.4, 0.61, -0.18, -1.22, -0.7, -0.12, -1.46, ...",Circle
...,...,...,...,...,...,...,...
61,"[-69.82, -174.8, -341.31, -406.74, -286.13, -2...","[71.78, -131.35, -372.56, -536.62, -589.84, -5...","[1024.9, 1546.39, 1828.13, 2005.86, 2131.35, 2...","[108.9, 108.11, 88.6, 68.35, 33.05, -2.01, -2....","[-67.93, -87.96, -101.89, -87.07, -53.81, -47....","[-2.93, 7.29, -13.63, -27.87, -21.16, -23.81, ...",Up and down
62,"[-122.56, -74.71, -38.57, 7.81, -0.98, -114.26...","[274.41, 166.5, 139.16, 107.42, 178.22, 345.21...","[259.77, 142.09, 30.76, 6.35, -49.8, -68.36, -...","[-65.61, -42.8, -30.52, -29.12, -16.04, 12.59,...","[30.58, 44.18, 50.64, 37.93, 4.79, -23.38, -37...","[23.02, 30.43, 36.89, 38.63, 21.52, -6.86, -17...",Up and down
63,"[-153.32, -193.36, -132.32, 10.25, -106.93, -2...","[-476.07, -490.23, -624.51, -532.71, -372.07, ...","[1956.54, 2165.53, 2293.95, 1994.63, 1751.95, ...","[10.82, -7.1, -30.21, -60.12, -97.71, -107.59,...","[6.37, 58.84, 31.65, -6.71, -15.37, -10.15, 8....","[17.68, 25.21, 24.18, 6.04, -8.11, -7.07, 8.45...",Up and down
64,"[-37.11, -13.18, -82.52, -35.16, -89.36, -174....","[330.57, 314.45, 230.47, 116.7, 53.22, -56.64,...","[8.3, 141.11, 514.16, 786.13, 1224.61, 1683.11...","[67.2, 100.18, 123.78, 126.16, 125.98, 125.61,...","[-1.13, -12.07, -28.81, -52.84, -56.01, -66.65...","[9.05, 13.38, 12.65, 0.98, -26.98, -38.72, -37...",Up and down


In [2041]:
totalDataFrame = convert(totalDataFrame)

In [2042]:
totalDataFrame

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0,-42.97,1.95,1033.20,-1.37,-0.91,0.67,Circle
0,1,-50.78,2.44,1032.23,-1.34,-2.80,1.49,Circle
0,2,-50.29,0.00,1041.50,-0.43,-1.25,-0.98,Circle
0,3,-42.97,10.25,1035.64,-2.44,-1.25,-2.04,Circle
0,4,-36.13,7.81,1046.88,-1.52,-2.93,-1.71,Circle
...,...,...,...,...,...,...,...,...
65,95,17.09,345.21,120.12,109.05,-11.01,-0.64,Up and down
65,96,-4.88,309.08,415.04,141.52,-24.48,1.37,Up and down
65,97,27.34,197.27,721.68,154.60,-32.01,-0.37,Up and down
65,98,7.81,142.09,1186.52,158.63,-43.96,-4.27,Up and down


In [2228]:
totalDataFrame_shuffled = shuffle_instances(totalDataFrame)

In [2230]:
totalDataFrame_shuffled


Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20,0,-319.82,-238.77,1086.91,-9.94,-1.65,55.85,Circle
20,1,-368.16,-125.49,922.36,-6.01,-8.75,57.99,Circle
20,2,-470.21,88.87,870.61,3.14,-3.45,58.84,Circle
20,3,-543.46,161.13,878.91,-0.21,2.01,60.46,Circle
20,4,-534.67,185.55,886.72,-7.87,-14.18,59.66,Circle
...,...,...,...,...,...,...,...,...
49,95,-54.69,158.69,234.86,52.90,-22.38,23.90,Up and down
49,96,-14.16,201.17,340.33,77.96,-39.24,0.58,Up and down
49,97,58.59,85.94,464.36,87.04,-32.96,-4.97,Up and down
49,98,1.46,34.67,846.68,90.06,-26.92,-22.65,Up and down


In [2045]:
# Now separate features (X) and labels (y).
# Each instance's label is constant across time steps,
# so we typically grab the first label for each instance.
y = totalDataFrame_shuffled.groupby('instance')['label'].first()

X = totalDataFrame_shuffled.drop(columns=['label'])
X = X.astype(float)

rocket = RocketClassifier()

In [2116]:
print(len(X), len(y))

6600 66


In [2118]:
# Fit RocketClassifier
rocket.fit(X, y)

print("Fit successful!")

Fit successful!


In [2240]:
X_test_single = X.loc[[49]] 

In [2242]:
X_test_single

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
49,0,-160.64,86.43,432.13,-77.16,16.25,1.52
49,1,-143.07,156.74,340.82,-61.34,25.09,10.03
49,2,-54.20,157.23,220.21,-32.99,23.48,27.23
49,3,-37.11,160.16,128.42,-16.68,29.91,32.87
49,4,-17.58,93.26,117.19,3.17,32.16,31.68
49,...,...,...,...,...,...,...
49,95,-54.69,158.69,234.86,52.90,-22.38,23.90
49,96,-14.16,201.17,340.33,77.96,-39.24,0.58
49,97,58.59,85.94,464.36,87.04,-32.96,-4.97
49,98,1.46,34.67,846.68,90.06,-26.92,-22.65


In [2244]:
y_pred_single = rocket.predict(X_test_single)

In [2245]:
y_pred_single

array(['Up and down'], dtype='<U11')

In [None]:
####################################

In [None]:
totalDataFrame_shuffled.loc[39]

In [None]:
y_data = totalDataFrame_shuffled['label']

In [None]:
X_data = totalDataFrame_shuffled.drop('label', axis=1)

In [None]:
y_data

In [None]:
X_data

In [None]:
# ---------------------------------

In [None]:
X_data_train, X_data_test, y_data_train, y_data_test = train_test_split(
    X_data, y_data, test_size=0.2, random_state=42, shuffle=False
)

In [None]:
X_data_train

In [None]:
y_data_train

In [None]:
X_data_train.info()

In [None]:
check_is_mtype(X_data_train, "nested_univ")

In [None]:
y_data_train.info()

In [None]:
print(len(X_data_train), len(y_data_train))

In [None]:
rocket = RocketClassifier(n_features_per_kernel=6, n_jobs=-1, random_state=42)

In [None]:
y = df.groupby('instance')['label'].first()
X = df.drop(columns=['label'])

In [None]:
rocket.fit(X_data_train, y_data_train)