In [310]:
!pip install sktime



In [311]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from time import sleep
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sktime.classification.kernel_based import RocketClassifier
from sktime.datatypes import convert_to
from sktime.datatypes._panel._convert import from_nested_to_3d_numpy
from sktime.datatypes import check_is_mtype


In [312]:
nr_of_samples_in_one_batch = 100

In [313]:
def convert(df, list_cols=None, label_col='label'):
    if list_cols is None:
        list_cols = ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z']

    # 1. Reset index so we have a 'instance' column
    df = df.reset_index(drop=True).rename_axis('instance').reset_index()

    # 2. Explode list columns
    df = df.explode(list_cols, ignore_index=True)

    # 3. Create a 'time' column for each exploded element
    df['time'] = df.groupby('instance').cumcount()

    # 4. Set a MultiIndex of (instance, time)
    df = df.set_index(['instance', 'time'])

    # 5. Convert all non-label columns to float
    numeric_cols = [col for col in df.columns if col != label_col]
    df[numeric_cols] = df[numeric_cols].astype(float)

    return df


In [314]:
def read_X_samples(X, nr, samples):
    return samples.iloc[(nr*X):(nr*X+X), :]
        

In [315]:
def addEntryToDataFrame(df, data, label):
    new_entry = {f"feature_{i}": [np.array(data.iloc[:, i].values, dtype=np.float64)]  # Wrap in list
                 for i in range(data.shape[1])}
    new_entry["label"] = [label]  # Wrap label in list
    
    # Convert to DataFrame
    df_entry = pd.DataFrame(new_entry)

    # Concatenate
    df = pd.concat([df, df_entry], ignore_index=True)
    return df


In [316]:
def concatenate_dataFrames(dF1, dF2):
    frame = [dF1, dF2]
    return pd.concat(frame, ignore_index=True)

In [317]:
def renameFeatures(dF):
    preferred_names = ["acc_x", "acc_y", "acc_z", "gyr_x", "gyr_y", "gyr_z", "label"]
    dF.columns = preferred_names[:len(preferred_names)]
    return dF

In [318]:
def addWholeDatasetToDataFrame(dF, data, label):
    nr_of_batches = round(data.dropna(how='all').shape[0] / nr_of_samples_in_one_batch)
    
    for i in range(nr_of_batches):
        batch = read_X_samples(nr_of_samples_in_one_batch, i, data)
        dF = addEntryToDataFrame(dF, batch, label)
    dF = renameFeatures(dF)
    return dF

In [319]:
def readFile_addToDataFrame(filename, dF, label_name):
    motion = pd.read_csv(filename, na_filter=False, sep=r"\s+", header=None, dtype=np.float64)
    dF = addWholeDatasetToDataFrame(dF, motion, label_name)
    dF["label"] = dF["label"].astype("category")
    return dF

In [320]:
def shuffle_instances(df):
    # Get unique instance values and shuffle them
    shuffled_instances = shuffle(df.index.levels[0])  # Shuffle the instances
    
    # Reorder the DataFrame based on shuffled instances while keeping time order intact
    df_shuffled = pd.concat([df.loc[i] for i in shuffled_instances], keys=shuffled_instances)

    return df_shuffled

In [321]:
dF_updown = pd.DataFrame()
dF_circles = pd.DataFrame()

In [322]:
dF_updown = readFile_addToDataFrame('up_down3.txt', dF_updown, 'Up and down')
dF_circles = readFile_addToDataFrame('circles3.txt', dF_circles, 'Circle')

In [323]:
dF_updown['acc_x'].dtype, dF_updown['acc_x'][0].dtype, dF_updown['label'].dtype

(dtype('O'),
 dtype('float64'),
 CategoricalDtype(categories=['Up and down'], ordered=False, categories_dtype=object))

In [324]:
print(check_is_mtype(dF_updown, "nested_univ")) # Check if type is correct for sktime

False


In [325]:
print(type(dF_updown.iloc[0, 0]))

<class 'numpy.ndarray'>


In [326]:
dF_updown.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   acc_x   50 non-null     object  
 1   acc_y   50 non-null     object  
 2   acc_z   50 non-null     object  
 3   gyr_x   50 non-null     object  
 4   gyr_y   50 non-null     object  
 5   gyr_z   50 non-null     object  
 6   label   50 non-null     category
dtypes: category(1), object(6)
memory usage: 2.6+ KB


In [327]:
dF_circles

Unnamed: 0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
0,"[-10.74, -23.93, -10.25, -13.67, -12.21, -27.8...","[-6.84, 4.39, 5.86, -3.91, -4.39, -5.86, -3.42...","[1033.2, 1035.64, 1040.53, 1021.48, 1044.92, 1...","[-1.49, -2.07, -0.27, -1.49, -0.55, -0.85, 0.9...","[-2.68, -1.19, -1.55, -0.61, -2.07, -3.41, -1....","[-1.13, 1.8, 0.03, -1.89, 0.24, -0.55, 1.34, 0...",Circle
1,"[-31.25, -12.7, -11.23, -9.28, -14.65, -14.65,...","[-0.49, 13.18, 2.93, -4.39, 1.95, 0.49, 5.37, ...","[1034.18, 1036.62, 1034.67, 1033.2, 1042.48, 1...","[-3.57, -1.22, 0.09, -2.5, -3.6, -0.76, -1.25,...","[-1.86, -3.29, -0.3, 0.73, -1.89, -0.88, -0.88...","[-1.04, -0.49, -0.43, -1.4, 1.13, 0.03, -1.01,...",Circle
2,"[-23.44, -23.44, -17.09, 4.39, -17.58, -26.86,...","[-2.44, 4.88, 4.88, 2.93, 0.0, 9.77, 0.98, -1....","[1031.74, 1047.36, 1036.13, 1029.79, 1043.95, ...","[0.27, 0.49, 1.31, -1.22, -0.52, -0.73, -1.16,...","[-3.05, -3.35, -1.68, -1.31, -0.15, -2.9, -0.9...","[-1.98, -0.7, -0.09, -0.09, -1.46, 0.34, -1.16...",Circle
3,"[-22.95, -20.51, -16.6, -22.46, 5.37, -14.16, ...","[8.79, 0.0, 5.37, -3.42, -3.42, -13.18, -5.37,...","[1047.36, 1042.48, 1041.02, 1045.9, 1022.95, 1...","[-2.44, -0.34, -0.98, -0.12, 0.7, -0.27, -0.03...","[-2.01, -0.43, -2.65, -2.8, -2.62, -1.95, -0.9...","[0.15, 0.37, 0.15, 2.1, 2.5, -0.46, -1.55, -1....",Circle
4,"[-25.88, -21.0, -18.55, -19.04, -2.44, -9.77, ...","[-1.95, -0.49, -6.35, 4.88, 2.93, 1.46, 7.32, ...","[1041.99, 1042.97, 1042.97, 1044.92, 1048.34, ...","[-2.35, -1.83, 0.09, -1.37, -0.34, -0.3, 0.3, ...","[-0.82, -0.4, 0.24, -1.65, 1.19, -1.8, -1.52, ...","[-0.43, -0.06, -2.04, -1.68, -1.13, 1.71, 1.31...",Circle
5,"[-11.72, -19.04, -18.55, -14.65, -18.07, -18.0...","[1.95, 8.3, -6.35, 0.0, -7.32, 9.28, 9.77, 5.8...","[1040.53, 1036.62, 1040.53, 1043.95, 1033.2, 1...","[-1.1, -3.05, -0.27, -1.19, -1.34, -3.48, -1.4...","[-2.96, -1.74, -2.62, -1.19, -1.22, -0.55, -2....","[0.34, -0.15, -0.58, -2.2, 0.34, -0.18, -2.53,...",Circle
6,"[-134.28, -47.36, 18.07, -64.94, -104.0, -161....","[-163.57, -207.03, -194.34, 38.09, -118.65, 23...","[794.92, 952.64, 991.7, 1023.93, 947.27, 910.1...","[2.8, -1.89, -3.02, -0.12, 0.0, -10.0, -1.74, ...","[-2.74, -2.1, -1.13, -7.2, 2.16, 2.84, -1.89, ...","[16.98, 17.68, 41.8, 4.63, -3.14, -22.71, -25....",Circle
7,"[-10.25, -12.7, -18.55, -24.9, -31.25, -16.11,...","[15.14, 6.84, 13.18, -3.42, 5.86, 1.46, 4.88, ...","[1030.76, 1036.13, 1024.41, 1037.6, 1035.64, 1...","[-1.77, -2.44, -0.61, -2.59, -3.99, -2.35, -1....","[0.49, -0.06, -0.98, -3.66, -2.65, -2.1, -0.85...","[-0.06, 1.13, -0.61, -0.46, -3.2, -0.21, 1.52,...",Circle
8,"[-270.02, -398.44, -330.57, -349.12, -327.15, ...","[-180.18, -160.16, -48.83, -33.2, -166.02, -15...","[911.13, 1018.55, 925.78, 1090.82, 908.2, 910....","[-25.61, -6.77, -5.12, -2.38, -6.98, -2.8, -6....","[1.34, -7.84, -0.98, -0.37, 8.93, -2.96, -4.21...","[58.38, 55.34, 36.19, 42.99, 39.36, 26.62, -1....",Circle
9,"[-237.3, -331.54, 125.0, -130.37, 168.95, 250....","[321.78, -758.3, 325.2, 455.57, 442.87, 343.75...","[2015.14, 2221.19, 1061.04, 1165.04, 837.89, 1...","[-18.57, -5.24, -0.55, -1.4, 4.27, 7.04, -1.52...","[0.88, -12.2, -10.03, -10.49, 10.43, 1.68, -18...","[-10.61, -10.55, -85.37, -85.88, -80.7, -75.12...",Circle


In [328]:
print(len(dF_circles.loc[0, 'acc_x']))

for i in range(33):
    print(len(dF_circles.loc[i, 'acc_x']))

print(dF_circles.loc[10, 'acc_x'])


100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
[ -55.66  -88.87  -81.54  250.98   90.82  447.75  230.96  406.25  387.21
  169.92  112.79  -71.29  248.05  229.98   60.55   35.16   30.76 -129.39
 -144.53 -171.87 -238.77 -166.02 -154.3  -214.84 -235.84 -324.22  -37.6
 -179.2  -242.68 -468.26 -208.98 -289.06 -278.81 -227.05   19.53   -4.39
 -152.83  173.83  114.26  457.52  686.04  246.58  267.09  229.49  251.46
  553.71  201.66  161.13  120.12   49.8   -10.25  -30.27 -157.71 -133.79
 -160.16 -205.57 -247.07 -332.03 -284.18 -475.1  -219.24 -138.67 -155.27
   39.06   88.38  -57.62    1.46  -90.82  100.59 -175.78  265.62  129.39
  117.19  225.1   393.55  422.36  233.89  203.61  322.27  475.1    54.69
    9.28  139.16   13.18  -71.29 -305.18 -275.39 -363.77 -404.3  -371.09
 -383.3  -331.54 -420.41 -363.77 -326.17 -214.84  102.54  116.21  -33.69
  167.48]


In [329]:
totalDataFrame = concatenate_dataFrames(dF_circles, dF_updown)

In [330]:
dF_circles.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   acc_x   51 non-null     object  
 1   acc_y   51 non-null     object  
 2   acc_z   51 non-null     object  
 3   gyr_x   51 non-null     object  
 4   gyr_y   51 non-null     object  
 5   gyr_z   51 non-null     object  
 6   label   51 non-null     category
dtypes: category(1), object(6)
memory usage: 2.7+ KB


In [331]:
dF_updown.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   acc_x   50 non-null     object  
 1   acc_y   50 non-null     object  
 2   acc_z   50 non-null     object  
 3   gyr_x   50 non-null     object  
 4   gyr_y   50 non-null     object  
 5   gyr_z   50 non-null     object  
 6   label   50 non-null     category
dtypes: category(1), object(6)
memory usage: 2.6+ KB


In [332]:
totalDataFrame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   acc_x   101 non-null    object
 1   acc_y   101 non-null    object
 2   acc_z   101 non-null    object
 3   gyr_x   101 non-null    object
 4   gyr_y   101 non-null    object
 5   gyr_z   101 non-null    object
 6   label   101 non-null    object
dtypes: object(7)
memory usage: 5.7+ KB


In [333]:
#for row in range(len(totalDataFrame)):
 #   print(totalDataFrame.iloc[row]) 

In [334]:
totalDataFrame

Unnamed: 0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
0,"[-10.74, -23.93, -10.25, -13.67, -12.21, -27.8...","[-6.84, 4.39, 5.86, -3.91, -4.39, -5.86, -3.42...","[1033.2, 1035.64, 1040.53, 1021.48, 1044.92, 1...","[-1.49, -2.07, -0.27, -1.49, -0.55, -0.85, 0.9...","[-2.68, -1.19, -1.55, -0.61, -2.07, -3.41, -1....","[-1.13, 1.8, 0.03, -1.89, 0.24, -0.55, 1.34, 0...",Circle
1,"[-31.25, -12.7, -11.23, -9.28, -14.65, -14.65,...","[-0.49, 13.18, 2.93, -4.39, 1.95, 0.49, 5.37, ...","[1034.18, 1036.62, 1034.67, 1033.2, 1042.48, 1...","[-3.57, -1.22, 0.09, -2.5, -3.6, -0.76, -1.25,...","[-1.86, -3.29, -0.3, 0.73, -1.89, -0.88, -0.88...","[-1.04, -0.49, -0.43, -1.4, 1.13, 0.03, -1.01,...",Circle
2,"[-23.44, -23.44, -17.09, 4.39, -17.58, -26.86,...","[-2.44, 4.88, 4.88, 2.93, 0.0, 9.77, 0.98, -1....","[1031.74, 1047.36, 1036.13, 1029.79, 1043.95, ...","[0.27, 0.49, 1.31, -1.22, -0.52, -0.73, -1.16,...","[-3.05, -3.35, -1.68, -1.31, -0.15, -2.9, -0.9...","[-1.98, -0.7, -0.09, -0.09, -1.46, 0.34, -1.16...",Circle
3,"[-22.95, -20.51, -16.6, -22.46, 5.37, -14.16, ...","[8.79, 0.0, 5.37, -3.42, -3.42, -13.18, -5.37,...","[1047.36, 1042.48, 1041.02, 1045.9, 1022.95, 1...","[-2.44, -0.34, -0.98, -0.12, 0.7, -0.27, -0.03...","[-2.01, -0.43, -2.65, -2.8, -2.62, -1.95, -0.9...","[0.15, 0.37, 0.15, 2.1, 2.5, -0.46, -1.55, -1....",Circle
4,"[-25.88, -21.0, -18.55, -19.04, -2.44, -9.77, ...","[-1.95, -0.49, -6.35, 4.88, 2.93, 1.46, 7.32, ...","[1041.99, 1042.97, 1042.97, 1044.92, 1048.34, ...","[-2.35, -1.83, 0.09, -1.37, -0.34, -0.3, 0.3, ...","[-0.82, -0.4, 0.24, -1.65, 1.19, -1.8, -1.52, ...","[-0.43, -0.06, -2.04, -1.68, -1.13, 1.71, 1.31...",Circle
...,...,...,...,...,...,...,...
96,"[-38.09, -32.23, -45.9, -85.94, -72.27, -62.99...","[47.36, 38.57, 63.96, 79.1, 93.75, 64.94, 50.2...","[41.02, 26.86, -40.53, 27.83, 111.33, 312.01, ...","[-22.74, -4.05, 19.21, 45.03, 72.04, 92.32, 96...","[-6.22, -11.77, -23.87, -25.82, -17.01, -9.7, ...","[13.69, 22.26, 24.3, 19.97, 15.21, 16.8, 22.71...",Up and down
97,"[59.57, 64.45, 15.14, 9.77, 66.41, 78.12, 81.5...","[-326.17, -416.5, -371.58, -437.99, -453.61, -...","[2240.23, 2333.98, 2222.17, 2174.32, 2114.75, ...","[80.91, 47.87, 26.92, 8.66, -21.49, -52.35, -7...","[0.55, -24.7, -16.01, 17.2, 28.87, 27.53, 17.5...","[-22.01, -24.42, -13.9, 2.01, 6.98, 5.67, 5.37...",Up and down
98,"[-80.57, -89.84, -91.31, -125.49, -115.23, -10...","[18.07, 80.08, 60.06, 57.13, 58.59, 69.82, 85....","[378.91, 230.96, 104.0, 48.83, -34.67, -85.94,...","[-109.36, -88.17, -60.12, -33.78, -7.65, 17.99...","[6.1, 5.34, 4.48, 5.37, -1.22, -6.07, -6.62, -...","[6.65, 5.91, 9.33, 10.61, 13.41, 5.34, 2.07, -...",Up and down
99,"[-38.57, 16.11, -22.95, -56.64, -67.87, -28.81...","[-39.06, -76.66, -103.52, -203.12, -209.47, -2...","[1783.69, 1958.01, 2212.89, 2287.6, 2137.7, 20...","[102.47, 71.83, 42.26, 26.95, 18.78, 2.38, -21...","[-47.1, -61.98, -51.1, -39.91, -7.53, 25.49, 3...","[-13.26, -23.93, -29.88, -27.59, -8.11, 2.62, ...",Up and down


In [335]:
totalDataFrame = convert(totalDataFrame)

In [336]:
totalDataFrame

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0,-10.74,-6.84,1033.20,-1.49,-2.68,-1.13,Circle
0,1,-23.93,4.39,1035.64,-2.07,-1.19,1.80,Circle
0,2,-10.25,5.86,1040.53,-0.27,-1.55,0.03,Circle
0,3,-13.67,-3.91,1021.48,-1.49,-0.61,-1.89,Circle
0,4,-12.21,-4.39,1044.92,-0.55,-2.07,0.24,Circle
...,...,...,...,...,...,...,...,...
100,95,-41.02,25.88,138.18,94.97,-7.41,13.81,Up and down
100,96,-49.80,50.29,308.11,111.16,0.58,17.90,Up and down
100,97,-49.80,18.07,653.32,105.79,-2.93,15.49,Up and down
100,98,3.42,33.20,942.38,95.82,-15.12,22.56,Up and down


In [337]:
totalDataFrame_shuffled = shuffle_instances(totalDataFrame)

In [338]:
totalDataFrame_shuffled


Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,0,-22.95,8.79,1047.36,-2.44,-2.01,0.15,Circle
3,1,-20.51,0.00,1042.48,-0.34,-0.43,0.37,Circle
3,2,-16.60,5.37,1041.02,-0.98,-2.65,0.15,Circle
3,3,-22.46,-3.42,1045.90,-0.12,-2.80,2.10,Circle
3,4,5.37,-3.42,1022.95,0.70,-2.62,2.50,Circle
...,...,...,...,...,...,...,...,...
14,95,-21.97,313.96,784.18,-5.06,13.66,-49.18,Circle
14,96,1.46,443.36,990.72,-12.47,-18.54,-40.67,Circle
14,97,64.45,288.09,1194.34,6.34,0.76,-36.62,Circle
14,98,317.38,447.27,1353.52,-1.16,0.98,-28.14,Circle


In [339]:
# Get unique instances
unique_instances = totalDataFrame_shuffled.index.get_level_values('instance').unique()

# Compute the number of instances to take
num_instances = int(0.15 * len(unique_instances))

# Select the first 15% of instances
selected_instances = unique_instances[:num_instances]

# Create test dataframe
test_dataframe = totalDataFrame_shuffled.loc[selected_instances]

totalDataFrame_shuffled = totalDataFrame_shuffled.drop(index=selected_instances, level='instance')

In [340]:
totalDataFrame_shuffled

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0,-31.25,-0.49,1034.18,-3.57,-1.86,-1.04,Circle
1,1,-12.70,13.18,1036.62,-1.22,-3.29,-0.49,Circle
1,2,-11.23,2.93,1034.67,0.09,-0.30,-0.43,Circle
1,3,-9.28,-4.39,1033.20,-2.50,0.73,-1.40,Circle
1,4,-14.65,1.95,1042.48,-3.60,-1.89,1.13,Circle
...,...,...,...,...,...,...,...,...
14,95,-21.97,313.96,784.18,-5.06,13.66,-49.18,Circle
14,96,1.46,443.36,990.72,-12.47,-18.54,-40.67,Circle
14,97,64.45,288.09,1194.34,6.34,0.76,-36.62,Circle
14,98,317.38,447.27,1353.52,-1.16,0.98,-28.14,Circle


In [341]:
test_dataframe

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,0,-22.95,8.79,1047.36,-2.44,-2.01,0.15,Circle
3,1,-20.51,0.00,1042.48,-0.34,-0.43,0.37,Circle
3,2,-16.60,5.37,1041.02,-0.98,-2.65,0.15,Circle
3,3,-22.46,-3.42,1045.90,-0.12,-2.80,2.10,Circle
3,4,5.37,-3.42,1022.95,0.70,-2.62,2.50,Circle
...,...,...,...,...,...,...,...,...
60,95,-115.23,-72.27,253.42,-23.90,-18.84,0.40,Up and down
60,96,-105.47,41.50,135.25,-11.40,-11.74,-11.10,Up and down
60,97,-125.49,35.64,179.69,5.06,1.16,-13.14,Up and down
60,98,-62.99,9.77,161.13,32.20,3.32,1.86,Up and down


In [342]:
# Now separate features (X) and labels (y).
# Each instance's label is constant across time steps,
# so we typically grab the first label for each instance.
y_train = totalDataFrame_shuffled.groupby('instance')['label'].first()
X_train = totalDataFrame_shuffled.drop(columns=['label'])
X_train = X_train.astype(float)

y_test = test_dataframe.groupby('instance')['label'].first()
X_test = test_dataframe.drop(columns=['label'])
X_test = X_test.astype(float)


rocket = RocketClassifier()

In [366]:
y_train

instance
0           Circle
1           Circle
2           Circle
4           Circle
5           Circle
          ...     
95     Up and down
96     Up and down
98     Up and down
99     Up and down
100    Up and down
Name: label, Length: 86, dtype: object

In [368]:
X_train

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0,-31.25,-0.49,1034.18,-3.57,-1.86,-1.04
1,1,-12.70,13.18,1036.62,-1.22,-3.29,-0.49
1,2,-11.23,2.93,1034.67,0.09,-0.30,-0.43
1,3,-9.28,-4.39,1033.20,-2.50,0.73,-1.40
1,4,-14.65,1.95,1042.48,-3.60,-1.89,1.13
...,...,...,...,...,...,...,...
14,95,-21.97,313.96,784.18,-5.06,13.66,-49.18
14,96,1.46,443.36,990.72,-12.47,-18.54,-40.67
14,97,64.45,288.09,1194.34,6.34,0.76,-36.62
14,98,317.38,447.27,1353.52,-1.16,0.98,-28.14


In [343]:
print("Train: ", len(X_train), len(y_train))
print("Test: ", len(X_test), len(y_test))

Train:  8600 86
Test:  1500 15


In [344]:
# Fit RocketClassifier
rocket.fit(X_train, y_train)

print("Fit successful!")

Fit successful!


In [345]:
y_pred = rocket.predict(X_test)

In [346]:
y_pred

array(['Circle', 'Circle', 'Circle', 'Up and down', 'Up and down',
       'Circle', 'Up and down', 'Up and down', 'Up and down',
       'Up and down', 'Circle', 'Circle', 'Circle', 'Circle',
       'Up and down'], dtype='<U11')

In [347]:
y_test

instance
3          Circle
16         Circle
20         Circle
22         Circle
25         Circle
41         Circle
43         Circle
45         Circle
57    Up and down
58    Up and down
60    Up and down
67    Up and down
70    Up and down
74    Up and down
97    Up and down
Name: label, dtype: object

In [348]:
####################################

In [349]:
totalDataFrame_shuffled.loc[39]

Unnamed: 0_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,label
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,-513.67,117.19,1102.05,3.72,-8.05,-135.06,Circle
1,-722.17,324.22,1211.91,-0.37,0.88,-80.95,Circle
2,-564.94,337.89,993.65,-3.29,4.88,-101.98,Circle
3,-465.82,221.19,876.46,4.54,-7.50,-72.41,Circle
4,-259.28,460.45,1074.71,8.75,-2.26,-84.36,Circle
...,...,...,...,...,...,...,...
95,-358.89,-335.45,996.58,3.41,3.75,28.63,Circle
96,-294.92,-74.22,1182.13,-4.15,-7.59,12.77,Circle
97,-372.56,-192.87,869.63,-7.68,-1.01,4.05,Circle
98,-371.09,-399.41,973.63,-17.26,2.90,-43.23,Circle


In [350]:
y_data = totalDataFrame_shuffled['label']

In [351]:
X_data = totalDataFrame_shuffled.drop('label', axis=1)

In [352]:
y_data

instance  time
1         0       Circle
          1       Circle
          2       Circle
          3       Circle
          4       Circle
                   ...  
14        95      Circle
          96      Circle
          97      Circle
          98      Circle
          99      Circle
Name: label, Length: 8600, dtype: object

In [353]:
X_data

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0,-31.25,-0.49,1034.18,-3.57,-1.86,-1.04
1,1,-12.70,13.18,1036.62,-1.22,-3.29,-0.49
1,2,-11.23,2.93,1034.67,0.09,-0.30,-0.43
1,3,-9.28,-4.39,1033.20,-2.50,0.73,-1.40
1,4,-14.65,1.95,1042.48,-3.60,-1.89,1.13
...,...,...,...,...,...,...,...
14,95,-21.97,313.96,784.18,-5.06,13.66,-49.18
14,96,1.46,443.36,990.72,-12.47,-18.54,-40.67
14,97,64.45,288.09,1194.34,6.34,0.76,-36.62
14,98,317.38,447.27,1353.52,-1.16,0.98,-28.14


In [354]:
# ---------------------------------

In [355]:
X_data_train, X_data_test, y_data_train, y_data_test = train_test_split(
    X_data, y_data, test_size=0.2, random_state=42, shuffle=False
)

In [356]:
X_data_train

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z
instance,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0,-31.25,-0.49,1034.18,-3.57,-1.86,-1.04
1,1,-12.70,13.18,1036.62,-1.22,-3.29,-0.49
1,2,-11.23,2.93,1034.67,0.09,-0.30,-0.43
1,3,-9.28,-4.39,1033.20,-2.50,0.73,-1.40
1,4,-14.65,1.95,1042.48,-3.60,-1.89,1.13
...,...,...,...,...,...,...,...
84,75,42.97,11.72,814.45,-105.76,-64.33,-9.85
84,76,-4.39,65.92,550.29,-78.99,-78.81,-10.06
84,77,-21.00,67.87,275.88,-61.34,-70.06,-1.98
84,78,-42.48,23.44,140.62,-41.04,-58.17,0.67


In [357]:
y_data_train

instance  time
1         0            Circle
          1            Circle
          2            Circle
          3            Circle
          4            Circle
                     ...     
84        75      Up and down
          76      Up and down
          77      Up and down
          78      Up and down
          79      Up and down
Name: label, Length: 6880, dtype: object

In [358]:
X_data_train.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 6880 entries, (1, 0) to (84, 79)
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   acc_x   6880 non-null   float64
 1   acc_y   6880 non-null   float64
 2   acc_z   6880 non-null   float64
 3   gyr_x   6880 non-null   float64
 4   gyr_y   6880 non-null   float64
 5   gyr_z   6880 non-null   float64
dtypes: float64(6)
memory usage: 345.7 KB


In [359]:
check_is_mtype(X_data_train, "nested_univ")

False

In [360]:
y_data_train.info()

<class 'pandas.core.series.Series'>
MultiIndex: 6880 entries, (1, 0) to (84, 79)
Series name: label
Non-Null Count  Dtype 
--------------  ----- 
6880 non-null   object
dtypes: object(1)
memory usage: 77.0+ KB


In [361]:
print(len(X_data_train), len(y_data_train))

6880 6880


In [362]:
rocket = RocketClassifier(n_features_per_kernel=6, n_jobs=-1, random_state=42)

In [363]:
y = df.groupby('instance')['label'].first()
X = df.drop(columns=['label'])

NameError: name 'df' is not defined

In [None]:
rocket.fit(X_data_train, y_data_train)