In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import pickle

import sys
sys.path.append('../..')
sys.path.append('../../7_Classification/BehaviourClassification/DeepLearning/dl-4-tsc-master')

from holsteinlib import windowing, functions
from holsteinlib.evaluation_v2 import evaluate_model

# MiniRocket
from sktime.transformations.panel.rocket import MiniRocketMultivariateVariable

# DL
from sklearn.preprocessing import StandardScaler, LabelEncoder
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split

In [None]:
def combine_class_data(data_dict, keys = ['accX', 'accY', 'accZ'], trim=0):
    
    def trim_data(data):
        trimmed_data = [data[key][:trim] if trim > 0 else data[key] for key in keys]
#         return np.hstack(trimmed_data)
        return trimmed_data
    
    X = [trim_data(data) for _, data_set in data_dict.items() for data in data_set]
    y = [label for label, data_set in data_dict.items() for _ in data_set]
 
    return X, y

In [None]:
class Logger:
    def __init__(self, filepath):
        self.file = open(filepath, 'w')
        self.terminal = sys.stdout

    def write(self, message):
        self.terminal.write(message)
        self.file.write(message)

    def flush(self):
        pass 

In [None]:
dataset_path = './WISDM_Dataset/WISDM_at_v2.0_raw.txt'

column_names = ['series', 'label', 'timestamp', 'accX', 'accY', 'accZ']

dtype = {
    'user': int,
    'activity': str,
    'timestamp': int,
    'accX': float,
    'accY': float,
    'accZ': float
}

In [None]:
class_labels = sorted(['drinking_milk', 'grooming', 'lying', 'other', 'running', 'walking'])

In [None]:
dataset_df = pd.read_csv(dataset_path, sep=',', lineterminator=';', names=column_names, 
                         skip_blank_lines=True, on_bad_lines='skip', low_memory=False)

# Data preprocessing

In [None]:
# Remove \n characters from user column and convert to integer
dataset_df['user'] = dataset_df['user'].str.replace('\n', '').replace('', np.nan).astype('Int64')

# Convert timestamp to datetime
dataset_df['timestamp'] = pd.to_datetime(dataset_df['timestamp'], errors='coerce', unit='ms')

# Convert accX, accY, accZ to float and handle conversion errors
dataset_df['accX'] = pd.to_numeric(dataset_df['accX'], errors='coerce')
dataset_df['accY'] = pd.to_numeric(dataset_df['accY'], errors='coerce')
dataset_df['accZ'] = pd.to_numeric(dataset_df['accZ'], errors='coerce')

# Drop rows with NaN values in any of the specified columns
dataset_df.dropna(subset=['user', 'timestamp', 'accX', 'accY', 'accZ'], inplace=True)

In [None]:
dataset_df

In [None]:
dataset_df.to_csv('../../7_Classification/BehaviourClassification/Transformers/ConvTran-main/Dataset/Segmentation/ActivityRecognition/ActivityRecognition.csv', index=False)

# windowing

In [None]:
user_ids = dataset_df.user.unique()

# windowing params
window_duration = 3
data_frequency = 20
min_window_size = 0.95
overlap = 0.5
datetime_column_name = 'timestamp'

In [None]:
%%time

window_dataset = {}
for user in user_ids:
    window_dataset[user] = {}
    
    user_df = dataset_df[dataset_df.user == user]
    
    labels = user_df.activity.unique()
    
    for label in labels:
        window_dataset[user][label] = []
        
        label_df = user_df[user_df.activity == label]
        
        windows = windowing.return_windows(label_df, 
                                           window_duration=window_duration, 
                                           data_frequency=data_frequency, 
                                           min_window_size=min_window_size, 
                                           overlap=overlap, 
                                           datetime_column_name=datetime_column_name)
        
        window_dataset[user][label].extend(windows)

In [None]:
with open('WISDM_window_dataset_v1.pickle', 'wb') as handle:
    pickle.dump(window_dataset, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Deep Learning Classification

In [None]:
def preprocess_data(data_dict, max_length):
    X = []
    y = []
    
    for subject_id, labels_dict in data_dict.items():
        for label, df_list in labels_dict.items():
            for df in df_list:
                features = df[['accX', 'accY', 'accZ']].values
                
                X.append(features)
                y.append(label)
                
    # Pad sequences to ensure they have the same length
    X = pad_sequences(X, maxlen=max_length, dtype='float32', padding='post', truncating='post')
    
    y = np.array(y)
    
    # Standardize the feature data
    num_features = X.shape[2]
    # Reshape X to 2D array for standardization (ignoring padding)
    X_reshaped = X.reshape(-1, num_features)
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_reshaped)

    X = X_scaled.reshape(-1, max_length, num_features)
    
    return X, y

In [None]:
max_length = 60 # 20Hz
X_processed, y_processed = preprocess_data(window_dataset, max_length)

# Train, Validation, test split

In [None]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X_processed, y_processed, test_size=0.33, 
                                                    stratify=y_processed, random_state=42)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.33, 
                                                    stratify=y_train_val, random_state=42)

# Label encoding

In [None]:
# Encode string labels to integers
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

# Convert labels to one-hot encoding
nb_classes = len(np.unique(y_train))
y_train_one_hot = to_categorical(y_train_encoded, nb_classes)
y_val_one_hot = to_categorical(y_val_encoded, nb_classes)
y_test_one_hot = to_categorical(y_test_encoded, nb_classes)

# MLP

In [None]:
from classifiers.mlp import Classifier_MLP

In [None]:
# Define the output directory
output_directory = 'DL_Results/mlp/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_MLP(output_directory, input_shape, nb_classes, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_val, y_val_one_hot, y_val_encoded)

In [None]:
df_metrics, mlp_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(mlp_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# FCN

In [None]:
from classifiers.fcn import Classifier_FCN

In [None]:
# Define the output directory
output_directory = 'DL_Results/fcn/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_FCN(output_directory, input_shape, nb_classes, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_val, y_val_one_hot, y_val_encoded)

In [None]:
df_metrics, fcn_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(fcn_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# RESNET

In [None]:
from classifiers.resnet import Classifier_RESNET

In [None]:
# Define the output directory
output_directory = 'DL_Results/resnet/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_RESNET(output_directory, input_shape, nb_classes, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_val, y_val_one_hot, y_val_encoded)

In [None]:
df_metrics, resnet_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(resnet_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Encoder

In [None]:
from classifiers.encoder import Classifier_ENCODER

In [None]:
# Define the output directory
output_directory = 'DL_Results/encoder/'

log_filepath = os.path.join(output_directory, 'training_log.txt')

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

# Redirect verbose output to the file
sys.stdout = Logger(log_filepath)

classifier = Classifier_ENCODER(output_directory, input_shape, nb_classes, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_val, y_val_one_hot, y_val_encoded)

# Reset stdout to its original value
sys.stdout = sys.stdout.terminal

print("Training complete")

In [None]:
df_metrics, encoder_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(encoder_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# MCNN

In [None]:
from classifiers.mcnn import Classifier_MCNN

In [None]:
# Define the output directory
output_directory = 'DL_Results/mcnn/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_MCNN(output_directory=output_directory, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_test, y_test_one_hot, y_test_encoded, X_val, y_val_one_hot)

In [None]:
df_metrics, mcnn_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot,
                                            X_val, y_val_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(mcnn_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# t-LeNet

In [None]:
from classifiers.tlenet import Classifier_TLENET

In [None]:
# Define the output directory
output_directory = 'DL_Results/tlenet/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_TLENET(output_directory, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_test, y_test_one_hot, y_test_encoded, X_val, y_val_one_hot)

In [None]:
df_metrics, tlenet_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(tlenet_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# MCDCNN

In [None]:
from classifiers.mcdcnn import Classifier_MCDCNN

In [None]:
# Define the output directory
output_directory = 'DL_Results/mcdcnn/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_MCDCNN(output_directory, input_shape, nb_classes, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_test, y_test_one_hot, y_test_encoded, X_val, y_val_one_hot)

In [None]:
df_metrics, mcdcnn_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(mcdcnn_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# TWIESEN

In [None]:
from classifiers.twiesn import Classifier_TWIESN

In [None]:
# Define the output directory
output_directory = 'DL_Results/twiesen/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_TWIESN(output_directory=output_directory, verbose=True)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_test, y_test_one_hot, y_test_encoded, X_val, y_val_one_hot)

In [None]:
df_metrics, train_acc, twiesen_y_pred = classifier.train()

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(twiesen_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Time-CNN

In [None]:
from classifiers.cnn import Classifier_CNN

In [None]:
# Define the output directory
output_directory = 'DL_Results/cnn/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_CNN(output_directory, input_shape, nb_classes, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_val, y_val_one_hot, y_val_encoded)

In [None]:
df_metrics, cnn_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(cnn_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Inception

In [None]:
from classifiers.inception import Classifier_INCEPTION

In [None]:
# Define the output directory
output_directory = 'DL_Results/inception/'

# Create the model
input_shape = X_train.shape[1:]  # Input shape for the MLP model
verbose = True

classifier = Classifier_INCEPTION(output_directory, input_shape, nb_classes, verbose=verbose)

# Train the model
classifier.fit(X_train, y_train_one_hot, X_val, y_val_one_hot, y_val_encoded)

In [None]:
df_metrics, inception_y_pred = classifier.predict(X_test, y_test_encoded, X_train, y_train_one_hot, y_test_one_hot)

df_metrics.to_csv(output_directory + 'df_metrics.csv')

df_metrics

In [None]:
y_pred_original = label_encoder.inverse_transform(inception_y_pred)

class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_test, y_pred_original, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open(output_directory + 'all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# ConvTran

In [None]:
os.chdir('../../7_Classification/BehaviourClassification/Transformers/ConvTran-main')

In [None]:
# Set the arguments as if they are coming from the command line
sys.argv = [
    'main.py',
    '--data_path', 'Dataset/Wisdm/',
    '--output_dir', '../../../../8_Evaluation/Baseline_Evaluation/ConvTran_Results/',
    '--Norm', 'False',
    '--val_ratio', '0.2',
    '--print_interval', '10',
    '--Net_Type', 'C-T',
    '--emb_size', '16',
    '--dim_ff', '256',
    '--num_heads', '8',
    '--Fix_pos_encode', 'tAPE',
    '--Rel_pos_encode', 'eRPE',
    '--epochs', '100',
    '--batch_size', '16',
    '--lr', '1e-3',
    '--dropout', '0.01',
    '--val_interval', '2',
    '--key_metric', 'accuracy',
    '--gpu', '0',
    '--seed', '1234'
]

# Run the script
%run main.py

In [None]:
os.chdir('../../../../8_Evaluation/Baseline_Evaluation')

# ROCKET

## ROCKET feature generation

In [None]:
X_df = pd.DataFrame(X)

trf = MiniRocketMultivariateVariable(num_kernels=10000) 
trf.fit(X_df) 
X_features = trf.transform(X_df) 

In [None]:
np.save('X_Rocket_features', X_features)

## Classification

In [None]:
X_ROCKET_train_val, X_ROCKET_test, y_ROCKET_train_val, y_ROCKET_test = train_test_split(X_features, y_processed, test_size=0.33, 
                                                    stratify=y_processed, random_state=42)

In [None]:
# ridge_clf = RidgeClassifierCV(**random_search.best_params_)
ridge_clf = RidgeClassifierCV(fit_intercept= False, class_weight= 'balanced', alphas= 131.31400000000002)

ridge_clf.fit(X_ROCKET_train_val, y_ROCKET_train_val)

y_pre = ridge_clf.predict(X_ROCKET_test)

print('Balanced Accuracy: ', balanced_accuracy_score(y_ROCKET_test, y_pre))

In [None]:
class_wise_metrics, overall_report, additional_metrics, cm = evaluate_model(y_ROCKET_test, y_pre, class_labels)

all_results = {
    'class_wise_metrics' : class_wise_metrics,
    'overall_report' : overall_report,
    'additional_metrics' : additional_metrics,
    'confusion_matirx': cm
}

with open('ROCKET_results/all_results.pickle', 'wb') as handle:
    pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)