# Proposed Model Architecture

This notebook is to try the proposed model architecure with cross-sensor-attention for PAMAP2 dataset and evaluate their performance

# Imports

In [None]:
# Standard library imports
import inspect
import logging
import math
import os
import pickle
import random
import shutil
import subprocess
import time

# Third-party imports
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    silhouette_score,
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import wandb
from sklearn.manifold import TSNE

In [None]:
def clear_working_directory():
    directory_to_clear = '/kaggle/working/'

    for filename in os.listdir(directory_to_clear):
        file_path = os.path.join(directory_to_clear, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)   
        except Exception as e:
            print(f"Failed to remove {file_path}. Reason: {e}")
            
clear_working_directory()

# Hyper-paramters

In [None]:
EPOCHS = 70
BATCH_SIZE = 128
IMU_FEATURE_COUNT = 108
CLASSES = 12
LEARNING_RATE = 0.001
SEQUENCE_LENGTH= 120
OVERLAP = 40


BEST_ACCURACY = 0.0
BEST_LOSS = 1000

MAX_SAVED_MODELS = 10

# WEIGHT_CLASSIFIER_LOSS  = 1 

# WEIGHT_TRIPLET_LOSS = 0
MIN_LR = 0.001
LR_FACTOR = 0.4
PATIENCE = 5




In [None]:
wandb.login(key="59bd7d0ab50dc7224820069ac4ab705f01fa85d8")
# wandb.login(key="b15ee5c84e51289dd7b5dd11ea38949957d772f9") 

In [None]:
wandb.init(
    project="PAMAP2 - Cross Sensor Attention - Overall",
    name = "Version-08",
    # notes = "test nordic walking",

    config={
        "architecture": "Transformer",
        "dataset": "PAMAP2",
        "epochs": EPOCHS,
        # "epoch_batch_count" : EPOCH_BATCH_COUNT,
        "batch_size" : BATCH_SIZE,
        "imu_feature_count" : IMU_FEATURE_COUNT,
        "classes" : CLASSES,
        "learning_rate" : LEARNING_RATE,
#         "WEIGHT_CLASSIFIER_LOSS" : WEIGHT_CLASSIFIER_LOSS,
#         "WEIGHT_TRIPLET_LOSS" : WEIGHT_TRIPLET_LOSS,
        "Sequence_length":SEQUENCE_LENGTH,
        "Overlap":OVERLAP
    }
)

# Activities in dataset

In [None]:
activityIDdict  = {0: 'transient',
              1: 'lying',
              2: 'sitting',
              3: 'standing',
              4: 'walking',
              5: 'running',
              6: 'cycling',
              7: 'Nordic_walking',
              9: 'watching_TV',
              10: 'computer_work',
              11: 'car driving',
              12: 'ascending_stairs',
              13: 'descending_stairs',
              16: 'vacuum_cleaning',
              17: 'ironing',
              18: 'folding_laundry',
              19: 'house_cleaning',
              20: 'playing_soccer',
              24: 'rope_jumping' }



# 12 classes

In [None]:
all_list = [1, 2, 3, 17, 16, 12, 13, 4, 7, 6, 5, 24]

activity_names = [
    'lying',
    'sitting',
    'standing',
    'ironing',
    'vacuum_cleaning',
    'ascending_stairs',
    'descending_stairs',
    'walking',
    'Nordic_walking',
    'cycling',
    'running',
    'rope_jumping'
]


# Map activities to IDs dynamically
activity_id_mapping = {key: activity for key, activity in zip(all_list, activity_names)}

# Arrange all_list based on the shuffled activity names
all_list = list(activity_id_mapping.keys())

# Output the results
print("Shuffled Activity Names:", activity_names)
print("Activity ID Mapping:", activity_id_mapping)


In [None]:
LOG_LEVEL = "INFO" # Adjust this to "DEBUG", "INFO", "WARNING" or "ERROR"

In [None]:
# Map log level strings to logging constants
log_levels = {
    "DEBUG": logging.DEBUG,
    "INFO": logging.INFO,
    "WARNING": logging.WARNING,
    "ERROR": logging.ERROR
}
set_log_level = log_levels.get(LOG_LEVEL, logging.INFO)  


logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=set_log_level,
    datefmt="%Y-%m-%d %H:%M:%S"
)

def log_message(level, message, block=None, log_title="", caller_frame=None):
    """
    Logs a message with a specific logging level and additional details.
    
    Args:
        level (str): Logging level ('DEBUG', 'INFO', 'WARNING', 'ERROR').
        message (str): The message to log.
        block (str, optional): Additional block/section name for context.
        log_title (str): Title to specify log type.
        caller_frame (frame, optional): Frame object of the calling function.
    """
    
    line_number = caller_frame.f_lineno if caller_frame else "N/A"
    function_name = caller_frame.f_code.co_name if caller_frame else "N/A"

    
    formatted_title = log_title.ljust(7)  
    formatted_line = f"Line {line_number}".ljust(8) 


    log_msg = f"{formatted_title} | {formatted_line} | {message}"
    if block:
        log_msg += f" - Block: {block}"


    should_print = log_levels[level.upper()] >= set_log_level

    if should_print:
        print(log_msg)  

    if level.upper() == "DEBUG":
        logging.debug(log_msg)
    elif level.upper() == "INFO":
        logging.info(log_msg)
    elif level.upper() == "WARNING":
        logging.warning(log_msg)
    elif level.upper() == "ERROR":
        logging.error(log_msg)
    else:
        logging.info("Unknown log level specified.")


def print_log(message, block=None):
    caller_frame = inspect.currentframe().f_back
    log_message("INFO", message, block, log_title="INFO", caller_frame=caller_frame)

def debug_log(message, block=None):
    caller_frame = inspect.currentframe().f_back
    log_message("DEBUG", message, block, log_title="DEBUG", caller_frame=caller_frame)

def warn_log(message, block=None):
    caller_frame = inspect.currentframe().f_back
    log_message("WARNING", message, block, log_title="WARNING", caller_frame=caller_frame)

def error_log(message, block=None):
    caller_frame = inspect.currentframe().f_back
    log_message("ERROR", message, block, log_title="ERROR", caller_frame=caller_frame)

In [None]:
# Load data
list_of_files = ['/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject101.dat',
                 '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject102.dat',
                 '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject103.dat',
                 '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject104.dat',
                 '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject105.dat',
                 '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject106.dat',
                 '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject107.dat',
                 '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject108.dat',
                 '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol/subject109.dat' ]

In [None]:
subjectID = [1,2,3,4,5,6,7,8,9]


In [None]:
colNames = ["timestamp", "activityID","heartrate"]
IMUhand = ['handTemperature', 
           'handAcc16_1', 'handAcc16_2', 'handAcc16_3', 
           'handAcc6_1', 'handAcc6_2', 'handAcc6_3', 
           'handGyro_1', 'handGyro_2', 'handGyro_3', 
           'handMagne_1', 'handMagne_2', 'handMagne_3',
           'handOrientation_1', 'handOrientation_2', 'handOrientation_3', 'handOrientation_4']

IMUchest = ['chestTemperature', 
           'chestAcc16_1', 'chestAcc16_2', 'chestAcc16_3', 
           'chestAcc6_1', 'chestAcc6_2', 'chestAcc6_3', 
           'chestGyro_1', 'chestGyro_2', 'chestGyro_3', 
           'chestMagne_1', 'chestMagne_2', 'chestMagne_3',
           'chestOrientation_1', 'chestOrientation_2', 'chestOrientation_3', 'chestOrientation_4']

IMUankle = ['ankleTemperature', 
           'ankleAcc16_1', 'ankleAcc16_2', 'ankleAcc16_3', 
           'ankleAcc6_1', 'ankleAcc6_2', 'ankleAcc6_3', 
           'ankleGyro_1', 'ankleGyro_2', 'ankleGyro_3', 
           'ankleMagne_1', 'ankleMagne_2', 'ankleMagne_3',
           'ankleOrientation_1', 'ankleOrientation_2', 'ankleOrientation_3', 'ankleOrientation_4']

columns = colNames + IMUhand + IMUchest + IMUankle

len(columns)

# Pre-processing

In [None]:
directory= '/kaggle/input/pampa2/PAMAP2_Dataset/Protocol'

dataCollection = pd.DataFrame()
data_dict={}
for filename in os.listdir(directory):
    file = os.path.join(directory, filename)
    print(file)
    procData = pd.read_table(file, header=None, sep='\s+')
    procData.columns = columns
    procData['subject_id'] = int(file[-5])
    dataCollection = pd.concat([dataCollection, procData], ignore_index=True)

dataCollection.reset_index(drop=True, inplace=True)
dataCollection.head(10)

In [None]:
def dataCleaning(dataCollection):
        dataCollection = dataCollection.drop(['handOrientation_1', 'handOrientation_2', 'handOrientation_3', 'handOrientation_4',
                                             'chestOrientation_1', 'chestOrientation_2', 'chestOrientation_3', 'chestOrientation_4',
                                             'ankleOrientation_1', 'ankleOrientation_2', 'ankleOrientation_3', 'ankleOrientation_4','chestTemperature',
           'chestAcc6_1', 'chestAcc6_2', 'chestAcc6_3','ankleTemperature', 
           'ankleAcc16_1', 'ankleAcc16_2', 'ankleAcc16_3', 
           'handAcc6_1','handAcc6_2','handAcc6_3',
           'heartrate','handTemperature','timestamp'
                                             ],
                                             axis = 1)  # removal of orientation columns as they are not needed
        dataCollection = dataCollection.drop(dataCollection[dataCollection.activityID == 0].index) #removal of any row of activity 0 as it is transient activity which it is not used
        dataCollection = dataCollection.apply(pd.to_numeric, errors = 'coerce') #removal of non numeric data in cells
        dataCollection = dataCollection.interpolate() #removal of any remaining NaN value cells by constructing new data points in known set of data points
        
        return dataCollection

In [None]:
dataCol = dataCleaning(dataCollection)
dataCol.reset_index(drop = True, inplace = True)
dataCol.head(5)

In [None]:
dataCol.describe()

In [None]:
# Group by 'activityID' and 'subject_id' to count rows for each combination
activity_subject_counts = dataCol.groupby(['activityID', 'subject_id']).size().reset_index(name='Count')

# Set plot size for better readability
plt.figure(figsize=(14, 8))

# Create a bar plot with 'activityID' on the x-axis and 'Count' on the y-axis, colored by 'subject_id'
sns.barplot(data=activity_subject_counts, x='activityID', y='Count', hue='subject_id', dodge=True, palette='viridis')

# Customize plot appearance
plt.title('Row Counts for Each Subject within Each Activity')
plt.xlabel('Activity ID')
plt.ylabel('Row Count')
plt.legend(title='Subject ID', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=45)
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
unique_activity_ids = dataCol['activityID'].unique()
unique_subject_ids = dataCol['subject_id'].unique()

In [None]:
unique_activity_ids

In [None]:
unique_subject_ids

In [None]:
training_list = [ 1,  2,  3, 17, 16, 12, 13,  4,  7,  6,  5, 24]
validation_list = [ 1,  2,  3, 17, 16, 12, 13,  4,  7,  6,  5, 24]
testing_list = [ 1,  2,  3, 17, 16, 12, 13,  4,  7,  6,  5, 24]
all_list = all_list

In [None]:
def imu_feature_extract(imu_type_data):
    if imu_type_data.shape[0] < 3:  # Minimum required rows for np.gradient with edge_order=2
        print_log("Not enough rows to calculate gradient. Skipping this file.")
        return pd.DataFrame()
    imu_type_data["fft_x"] = np.abs(np.fft.fft(imu_type_data["x"].values))
    imu_type_data["fft_y"] = np.abs(np.fft.fft(imu_type_data["y"].values))
    imu_type_data["fft_z"] = np.abs(np.fft.fft(imu_type_data["z"].values))

    imu_type_data["fd_x"] = np.gradient(imu_type_data["x"].values, edge_order=2)
    imu_type_data["fd_y"] = np.gradient(imu_type_data["y"].values, edge_order=2)
    imu_type_data["fd_z"] = np.gradient(imu_type_data["z"].values, edge_order=2)

    imu_type_data["sd_x"] = np.gradient(imu_type_data["fd_x"].values, edge_order=2)
    imu_type_data["sd_y"] = np.gradient(imu_type_data["fd_y"].values, edge_order=2)
    imu_type_data["sd_z"] = np.gradient(imu_type_data["fd_z"].values, edge_order=2)

    return imu_type_data

In [None]:
# When both hand and chest placements are considered - We need to add a logic here for every placement
imu_prefixes = ['handAcc16', 'handGyro', 'handMagne','chestAcc16','chestGyro','chestMagne','ankleGyro','ankleAcc6','ankleMagne']

In [None]:
def apply_imu_feature_extraction(dataCol, imu_cols):
    new_columns = []  

    for col_prefix in imu_cols:

        imu_data = dataCol[[f"{col_prefix}_1", f"{col_prefix}_2", f"{col_prefix}_3"]].copy()
        imu_data.columns = ['x', 'y', 'z']
        
        extracted_features = imu_feature_extract(imu_data)
        
        new_cols = pd.DataFrame()
        for feature in extracted_features.columns:
            new_cols[f"{col_prefix}_{feature}"] = extracted_features[feature]

        new_columns.append(new_cols)

    dataCol = pd.concat([dataCol] + new_columns, axis=1)

    for col_prefix in imu_cols:
        dataCol.drop(columns=[f"{col_prefix}_1", f"{col_prefix}_2", f"{col_prefix}_3"], inplace=True)
    
    return dataCol


In [None]:
def scaling(dataframe):
    std_scaler = StandardScaler()
    columns_names = list(dataframe.columns)
    dataframe = std_scaler.fit_transform(dataframe.to_numpy())
    dataframe = pd.DataFrame(dataframe, columns=columns_names)
    return dataframe

In [None]:
base_dir = './activity_data'
activity_counts = []

train_dir = os.path.join('./', 'train')
test_dir = os.path.join('./', 'test')
validation_dir = os.path.join('./', 'validation')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)

for activity_id in unique_activity_ids:
    
    activity_dir = os.path.join(base_dir, f'{activity_id}')
    os.makedirs(activity_dir, exist_ok=True)
    
    
    saved_csv_files = []
    
    
    for subject_id in unique_subject_ids:
        
        df_activity_subject = dataCol[(dataCol['activityID'] == activity_id) & (dataCol['subject_id'] == subject_id)]
        
        
        row_count = len(df_activity_subject)
        
        if row_count <=1:
            
            print_log(f'Skipped {subject_id} for activity {activity_id} due to insufficient data ({row_count} rows)')
            continue
        
        
        filename = os.path.join(activity_dir, f'{subject_id}.csv')
        
        
        df_activity_subject.to_csv(filename, index=False)
        
        
        activity_counts.append((activity_id, subject_id, row_count))
        
        
        saved_csv_files.append(filename)
        
        print_log(f'Saved {filename}')
    
    
    existing_csv_files = os.listdir(activity_dir)
    current_csv_count = len(existing_csv_files)
    print_log(existing_csv_files,"existing_csv_files")


In [None]:
train_sequences=[]
validation_sequences=[]
test_sequences=[]
activity_list=all_list

In [None]:
import random
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# Configuration parameters
base_path = "/kaggle/working/activity_data"
sequence_length = SEQUENCE_LENGTH   # Length of each IMU sequence
overlap = OVERLAP  # Overlap between sequences
activity_list_train = []
activity_list_test = []
activity_list_val = []

# Set specific users for testing
test_users = ['5.csv','6.csv']
print(f"Selected users {test_users} as test users for all activities")

# Iterate through each activity
for activity_id in activity_list:
    print(f"Processing activity: {activity_id}")
    activity_dir = os.path.join(base_path, f'{activity_id}')

    user_list_train = []
    user_list_test = []
    user_list_val = []

    for user_id in os.listdir(activity_dir):
        print(f"Processing user: {user_id}")
        file_path = os.path.join(activity_dir, user_id)
        imu_data = pd.read_csv(file_path)

        # Apply IMU feature extraction and drop unnecessary columns
        imu_data = apply_imu_feature_extraction(imu_data, imu_prefixes)
        imu_data = imu_data.drop(["subject_id", "activityID"], axis=1)
        imu_data = scaling(imu_data)

        # Generate sequences
        sequence_data = []
        num_samples = len(imu_data)
        num_sequences = (num_samples - sequence_length) // overlap + 1

        for i in range(num_sequences):
            sequence_start = i * overlap
            sequence_end = sequence_start + sequence_length
            if sequence_end <= num_samples:
                sequence = imu_data.iloc[sequence_start:sequence_end].copy()
                sequence_data.append(sequence.to_numpy())

        # Split data according to LOSO-CV with specific test users
        if user_id in test_users:
            # Use all sequences from these users as test data
            print(f"if : {user_id}")
            user_list_test.append(sequence_data)
        else:
             # print(f"else : {user_id}")
            # Split remaining users' data into train and validation sets
            train_data, val_data = train_test_split(sequence_data, test_size=0.2, random_state=12345)
              
            user_list_train.append(train_data)
            user_list_val.append(val_data)

    # Append results to the main lists for each activity
    activity_list_train.append(user_list_train)
    activity_list_test.append(user_list_test)
    activity_list_val.append(user_list_val)

# Now, activity_list_train, activity_list_val, and activity_list_test contain the split data according to LOSO-CV

In [None]:
training_imu_data=activity_list_train
testing_imu_data=activity_list_test
validation_imu_data=activity_list_val

# STDAT Model

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, k, d_model, seq_len):
        super().__init__()
        self.embedding = nn.Parameter(torch.zeros([k, d_model], dtype=torch.float), requires_grad=True)
        nn.init.xavier_uniform_(self.embedding, gain=1)
        self.positions = torch.tensor([i for i in range(seq_len)], requires_grad=False).unsqueeze(1).repeat(1, k)
        s = 0.0
        
        interval = seq_len / k
        mu = []
        for _ in range(k):
            mu.append(nn.Parameter(torch.tensor(s, dtype=torch.float), requires_grad=True))
            s = s + interval
        self.mu = nn.Parameter(torch.tensor(mu, dtype=torch.float).unsqueeze(0), requires_grad=True)
        self.sigma = nn.Parameter(torch.tensor([torch.tensor([50.0], dtype=torch.float, requires_grad=True) for _ in range(k)]).unsqueeze(0))
        
    def normal_pdf(self, pos, mu, sigma):
        a = pos - mu
        log_p = -1*torch.mul(a, a)/(2*(sigma**2)) - torch.log(sigma)
        return torch.nn.functional.softmax(log_p, dim=1)

    def forward(self, inputs):
        pdfs = self.normal_pdf(self.positions, self.mu, self.sigma)
        pos_enc = torch.matmul(pdfs, self.embedding)
        
        return inputs + pos_enc.unsqueeze(0).repeat(inputs.size(0), 1, 1)

class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, heads, _heads, dropout, seq_len):
        super(TransformerEncoderLayer, self).__init__()
        
        self.attention = nn.MultiheadAttention(d_model, heads, batch_first=True)
        self._attention = nn.MultiheadAttention(seq_len, _heads, batch_first=True)
        
        self.attn_norm = nn.LayerNorm(d_model)
        
        self.cnn_units = 1
        
        self.cnn = nn.Sequential(
            nn.Conv2d(1, self.cnn_units, (1, 1)),
            nn.BatchNorm2d(self.cnn_units),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Conv2d(self.cnn_units, self.cnn_units, (3, 3), padding=1),
            nn.BatchNorm2d(self.cnn_units),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Conv2d(self.cnn_units, 1, (5, 5), padding=2),
            nn.BatchNorm2d(1),
            nn.Dropout(dropout),
            nn.ReLU()
        )
        
        self.final_norm = nn.LayerNorm(d_model)

    def forward(self, src, src_mask=None):
        src = self.attn_norm(src + self.attention(src, src, src)[0] + self._attention(src.transpose(-1, -2), src.transpose(-1, -2), src.transpose(-1, -2))[0].transpose(-1, -2))
        
        src = self.final_norm(src + self.cnn(src.unsqueeze(dim=1)).squeeze(dim=1))
        
        return src

class TransformerEncoder(nn.Module):
    def __init__(self, d_model, heads, _heads, seq_len, num_layer=2, dropout=0.1):
        super(TransformerEncoder, self).__init__()

        self.layers = nn.ModuleList()
        for i in range(num_layer):
            self.layers.append(TransformerEncoderLayer(d_model, heads, _heads, dropout, seq_len))

    def forward(self, src):
        for layer in self.layers:
            src = layer(src)

        return src

class Transformer(nn.Module):
    def __init__(self, num_layer, d_model, k, heads, _heads, seq_len, trg_len, dropout):
        super(Transformer, self).__init__()
        

        self.pos_encoding = PositionalEncoding(k, d_model, seq_len)

        self.encoder = TransformerEncoder(d_model, heads, _heads, seq_len, num_layer, dropout)

    def forward(self, inputs):
        encoded_inputs = self.pos_encoding(inputs)

        return self.encoder(encoded_inputs)

class Model(nn.Module):
    def __init__(self, feature_count, l, trg_len, num_classes):
        super(Model, self).__init__()
        
        
        self.imu_transformer = Transformer(3, feature_count, 100, 4, 4, l, trg_len, 0.1)
        
        self.linear_imu = nn.Sequential(
            nn.Linear(feature_count*l, (feature_count*l)//2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear((feature_count*l)//2, trg_len),
            nn.ReLU()
        )
        
        # Batch normalization and dropout layers
        self.batch_norm = nn.BatchNorm1d(trg_len)
        self.dropout = nn.Dropout(0.5)
        


    def forward(self, inputs):
        
        embedding = self.linear_imu(torch.flatten(self.imu_transformer(inputs), start_dim=1, end_dim=2))
        
        # Apply batch normalization
        embedding = self.batch_norm(embedding)
        
        # Apply dropout
        embedding = self.dropout(embedding)
        

        
        return embedding


# Attention Block

In [None]:
class Attention(nn.Module):
    def __init__(self, input_dim):
        super(Attention, self).__init__()
        self.input_dim = input_dim
        self.query = nn.Linear(input_dim, input_dim)
        self.key = nn.Linear(input_dim, input_dim)
        self.value = nn.Linear(input_dim, input_dim)
        self.softmax = nn.Softmax(dim=2)
        
    def forward(self, x,y):
        queries = self.query(x)
        keys = self.key(y)
        values = self.value(y)
        queries_1=queries.unsqueeze(1)
        keys_1=keys.unsqueeze(1)
        values_1=values.unsqueeze(1)
        scores = torch.bmm(queries_1, keys_1.transpose(1, 2)) / (self.input_dim ** 0.5)
        
        return scores, values_1

# Cross-Sensor Attention

In [None]:
class CrossSensorAttention(nn.Module):
    def __init__(self,input_dim,num_classes):
        super(CrossSensorAttention,self).__init__()
        self.input_dim=input_dim
        self.softmax =nn.Softmax(dim=1)
        self.attention_cal =Attention(input_dim)
        self.classifier = nn.Linear(input_dim, num_classes)
    
    def forward(self,emb_1,emb_2,emb_3):
        attention_weight_1_2,val_1_2 = self.attention_cal(emb_1,emb_2)
        attention_weight_1_3,val_1_3 = self.attention_cal(emb_1,emb_3)
        attention_weight_2_1,val_2_1 = self.attention_cal(emb_2,emb_1)
        attention_weight_2_3,val_2_3 = self.attention_cal(emb_2,emb_3)
        attention_weight_3_1,val_3_1 = self.attention_cal(emb_3,emb_1)
        attention_weight_3_2,val_3_2 = self.attention_cal(emb_3,emb_2)
        # print(attention_weight_1_2.shape,"attention_weight_1_2")
        stacked_attention = torch.stack([attention_weight_1_2.squeeze(1),attention_weight_1_3.squeeze(1),attention_weight_2_1.squeeze(1),attention_weight_2_3.squeeze(1),attention_weight_3_1.squeeze(1),attention_weight_3_2.squeeze(1)],dim=1)
        # print(stacked_attention.shape,"stacked_attention")
        attention = self.softmax(stacked_attention)
        # print(attention[32],"att_val")
        # print(attention[0].sum(),"att_sum")
        # print(attention.shape,"attention")
        attention_matrix=attention.unsqueeze(2)
        attention_matrix=attention_matrix.transpose(0,1)
        # print(attention_matrix.shape,"attention_matrix")
        attention_matrix_1=attention_matrix[0]
        attention_matrix_2=attention_matrix[1]
        attention_matrix_3=attention_matrix[2]
        attention_matrix_4=attention_matrix[3]
        attention_matrix_5=attention_matrix[4]
        attention_matrix_6=attention_matrix[5]
        attention_matrices = [attention_matrix_1, attention_matrix_2, attention_matrix_3, attention_matrix_4, attention_matrix_5, attention_matrix_6]
        values = [val_1_2, val_1_3, val_2_1, val_2_3, val_3_1, val_3_2]
        # print(attention_matrix_1.shape,"attention_matrix_1")
        # print(val_1_2.shape,"val_1_2")
        output_values = []
        for attn, val in zip(attention_matrices, values):
            out=torch.bmm(attn, val)
            output_val=out.squeeze(dim=1)
            output_values.append(output_val)
        
        final_embedding = emb_1+emb_2+emb_3
        for val in output_values:
            final_embedding+=val
        class_scores =self.classifier(final_embedding)
        
        return class_scores,final_embedding,attention_matrix
        

# Overall Model

In [None]:
class MultiSensorModel(nn.Module):
    def __init__(self, feature_count, l, trg_len, num_classes, feature_dim):
        super(MultiSensorModel, self).__init__()
        
        # Three separate models for each sensor placement
        self.sensor_model_1 = Model(feature_count, l, trg_len, num_classes)
        # Cross-attention layer between embeddings
        self.cross_sensor_attention = CrossSensorAttention(feature_dim, num_classes)

    def forward(self, sensor_data1, sensor_data2,sensor_data3):
        # Get embeddings from each sensor model
        embedding_1 = self.sensor_model_1(sensor_data1)
        embedding_2 = self.sensor_model_1(sensor_data2)
        embedding_3 = self.sensor_model_1(sensor_data3)

        class_scores, feature_embedding,attention_matrix = self.cross_sensor_attention(embedding_1, embedding_2,embedding_3)
        
        # Return class scores and feature embeddings
        return class_scores, feature_embedding, embedding_1, embedding_2,embedding_3,attention_matrix


# GPU Specification

In [None]:
# torch.set_default_tensor_type('torch.cuda.FloatTensor')
if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    print_log("CUDA is available. Using GPU as default tensor type.")
else:
    torch.set_default_tensor_type('torch.FloatTensor')
    print_log("CUDA not available. Using CPU as default tensor type.")

In [None]:
batch_size=BATCH_SIZE
# epoch_batch_count=EPOCH_BATCH_COUNT
imu_l=SEQUENCE_LENGTH # TODO This should be changed as hyperparameter
imu_feature_count=IMU_FEATURE_COUNT
trg_len=512 # TODO This should be changed as hyperparameter
classes=CLASSES

In [None]:
best_model_save_path = '/kaggle/working/best_models'
checkpoint_save_path = '/kaggle/working/checkpoints'

subprocess.run(f"mkdir {best_model_save_path}", shell=True)
subprocess.run(f"mkdir {checkpoint_save_path}", shell=True)

# Train dataset

In [None]:
from math import floor, ceil
class TrainDataset(Dataset):
    def __init__(self, training_data, batch_size):
        self.training_data = training_data
        self.batch_size = batch_size
        flattened_length_train = sum(len(item) if isinstance(item, list) else 1 for sublist in training_data for item in (sublist if isinstance(sublist, list) else [sublist]))
        self.epoch_batch_count = ceil(flattened_length_train / batch_size)
        print(self.epoch_batch_count,"epoch_batch_count")

    def __len__(self):
        
        return self.batch_size * self.epoch_batch_count
        

    def __getitem__(self, idx):
        while True:
            try:
                genuine_user_idx = np.random.randint(0, len(self.training_data))
                imposter_user_idx = np.random.randint(0, len(self.training_data))
                # Ensure imposter_user_idx is different from genuine_user_idx
                while imposter_user_idx == genuine_user_idx:
                    imposter_user_idx = np.random.randint(0, len(self.training_data))
                
                # Validate the lengths of genuine_user and imposter_user data
                if len(self.training_data[genuine_user_idx]) == 0 or len(self.training_data[imposter_user_idx]) == 0:
                    raise ValueError("Empty user data detected.")
                
                genuine_sess_1 = np.random.randint(0, len(self.training_data[genuine_user_idx]))
                genuine_sess_2 = np.random.randint(0, len(self.training_data[genuine_user_idx]))
                
                # Ensure genuine_sess_2 is different from genuine_sess_1
                while genuine_sess_2 == genuine_sess_1:
                    genuine_sess_2 = np.random.randint(0, len(self.training_data[genuine_user_idx]))
                
                # Validate the lengths of genuine_sess_1 and genuine_sess_2 data
                if len(self.training_data[genuine_user_idx][genuine_sess_1]) == 0 or len(self.training_data[genuine_user_idx][genuine_sess_2]) == 0:
                    raise ValueError("Empty session data detected.")
                
                imposter_sess = np.random.randint(0, len(self.training_data[imposter_user_idx]))
                
                # Validate the length of imposter_sess data
                if len(self.training_data[imposter_user_idx][imposter_sess]) == 0:
                    raise ValueError("Empty imposter session data detected.")
                
                genuine_seq_1 = np.random.randint(0, len(self.training_data[genuine_user_idx][genuine_sess_1]))
                genuine_seq_2 = np.random.randint(0, len(self.training_data[genuine_user_idx][genuine_sess_2]))
                imposter_seq = np.random.randint(0, len(self.training_data[imposter_user_idx][imposter_sess]))
#                 print(genuine_user_idx,genuine_sess_1,genuine_sess_2,imposter_user_idx,imposter_sess)
                anchor = self.training_data[genuine_user_idx][genuine_sess_1][genuine_seq_1]
                positive = self.training_data[genuine_user_idx][genuine_sess_2][genuine_seq_2]
                negative = self.training_data[imposter_user_idx][imposter_sess][imposter_seq]


                return anchor, positive, negative, genuine_user_idx, imposter_user_idx
            
            except ValueError as e:
                print_log(f"Encountered ValueError: {str(e)}. Retrying with new indices.")

# Test dataset

In [None]:
class TestDataset(Dataset):
    def __init__(self, eval_data):
        self.eval_data = eval_data
        self.num_sessions = [len(user_sessions) for user_sessions in self.eval_data]  # List of number of sessions for each user
        self.num_seqs = [len(session) for user_sessions in self.eval_data for session in user_sessions]  # Total sequences across all users

    def __len__(self):
        # Total length of dataset will be the sum of all sequences across all users and sessions
        return sum(len(self.eval_data[user_idx][session_idx]) for user_idx in range(len(self.eval_data))
                   for session_idx in range(len(self.eval_data[user_idx])))

    def __getitem__(self, idx):
        # Find the user index and session index dynamically
        cumulative_length = 0
        for user_idx in range(len(self.eval_data)):
            for session_idx in range(len(self.eval_data[user_idx])):
                session_length = len(self.eval_data[user_idx][session_idx])
                if cumulative_length + session_length > idx:
                    seq_idx = idx - cumulative_length
                    data = self.eval_data[user_idx][session_idx][seq_idx]

                    # Debugging statements
                    debug_log(f"Index: {idx}, User Index: {user_idx}, Session Index: {session_idx}, Sequence Index: {seq_idx}")

                    # Check if data is None
                    if data is None:
                        error_log(f"Returned data is None for index: {idx} in testdata")
                    return data, user_idx

                cumulative_length += session_length
        
        # If we get here, idx is out of bounds
        raise IndexError("Index out of bounds for dataset.")

In [None]:
testing_data=testing_imu_data
test_dataset = TestDataset(testing_data)
test_dataloader = DataLoader(test_dataset, batch_size=8)

# Loss Function - Triplet Loss

In [None]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
        
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(dim=1).sqrt()
    
    def calc_cosine(self, x1, x2):
        dot_product_sum = (x1*x2).sum(dim=1)
        norm_multiply = (x1.pow(2).sum(dim=1).sqrt()) * (x2.pow(2).sum(dim=1).sqrt())
        return dot_product_sum / norm_multiply
    
    def calc_manhattan(self, x1, x2):
        return (x1-x2).abs().sum(dim=1)
    
    def forward(self, anchor, positive, negative):
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)

        return losses.mean()


In [None]:
dataset = TrainDataset(training_imu_data, batch_size)
dataloader = DataLoader(dataset, batch_size=batch_size)
model = MultiSensorModel(36, imu_l, trg_len, CLASSES, trg_len)

In [None]:
loss_fn = TripletLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE,weight_decay=1e-4) # change the learning rate

# Scheduler on LR based on validation loss

In [None]:
# Custom Scheduler Class
class CustomLRScheduler:
    def __init__(self, optimizer, patience=5, factor=0.3, min_lr=1e-4):
        """
        Parameters:
        - optimizer: PyTorch optimizer
        - patience: Number of epochs to wait for validation loss improvement
        - factor: Multiplicative factor to reduce the LR
        - min_lr: Minimum learning rate allowed
        """
        self.optimizer = optimizer
        self.patience = patience
        self.factor = factor
        self.min_lr = min_lr
        self.best_loss = float('inf')
        self.wait = 0

    def step(self, val_loss):
        """
        Checks the validation loss and adjusts the LR if needed.
        Parameters:
        - val_loss: Validation loss of the current epoch
        """
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.wait = 0  # Reset the wait counter
        else:
            self.wait += 1
            if self.wait >= self.patience:
                # Reduce LR if no improvement for 'patience' epochs
                self._reduce_lr()
                self.wait = 0  # Reset the wait counter

    def _reduce_lr(self):
        print_log("No improvement in the validation loss")
        for param_group in self.optimizer.param_groups:
            old_lr = param_group['lr']
            new_lr = max(old_lr * self.factor, self.min_lr)  # Ensure LR doesn't go below min_lr
            if old_lr > self.min_lr:
                param_group['lr'] = new_lr
                print(f"Reducing learning rate from {old_lr:.6f} to {new_lr:.6f}")
            else:
                print(f"Learning rate is already at the minimum value {self.min_lr:.6f}")

In [None]:
# Initialize the scheduler
scheduler = CustomLRScheduler(optimizer, patience=PATIENCE, factor=LR_FACTOR, min_lr= MIN_LR)

In [None]:
g_eer = math.inf
init_epoch = 0
epochs=EPOCHS 

In [None]:
best_accuracy = BEST_ACCURACY
best_loss = BEST_LOSS
training_losses = []
validation_losses = []
overall_losses =[]
feature_embeddings_train=[]
saved_models = []
max_saved_models = MAX_SAVED_MODELS  
feature_length=36
lr_history = []

In [None]:
def find_worst_loss_and_acc_in_existing_model(saved_models):
    worst_loss = float('-inf')
    worst_accuracy = float('inf')
    for model_info in saved_models:
        loss = model_info['loss']
        accuracy = model_info['accuracy']
        if worst_loss <  loss:
            worst_loss = loss
        if worst_accuracy > accuracy:
            worst_accuracy = accuracy
    return  worst_loss, worst_accuracy

def memory_is_low():
    """Check if memory is low. You can implement your own logic here."""
    # Placeholder logic; replace with actual memory checking
    import psutil
    return psutil.virtual_memory().available < (100 * 1024 * 1024)  # Less than 100 MB

def find_worst_model_by_accuracy(saved_models):
    worst_model = None
    worst_accuracy = float('inf')
    for model_info in saved_models:
        if model_info['accuracy'] < worst_accuracy:
            worst_accuracy = model_info['accuracy']
            worst_model = model_info
    if worst_model:
        print_log(f"Worst model - Path: {worst_model['path']}, Accuracy: {worst_model['accuracy']:.6f}, Loss: {worst_model['loss']:.6f}")
    if worst_model:
        if os.path.exists(worst_model['path']):
            os.remove(worst_model['path'])
            print_log(f"Deleted worst model by less accuracy: {worst_model['path']} with Accuracy: {worst_model['accuracy']:.6f}, Loss: {worst_model['loss']:.6f}")
        saved_models.remove(worst_model)

def find_worst_model_by_loss(saved_models):
    worst_model = None
    worst_loss = float('-inf')
    for model_info in saved_models:
        if model_info['loss'] > worst_loss:
            worst_loss = model_info['loss']
            worst_model = model_info
    if worst_model:
        print_log(f"Worst model - Path: {worst_model['path']}, Accuracy: {worst_model['accuracy']:.6f}, Loss: {worst_model['loss']:.6f}")
    if worst_model:
        if os.path.exists(worst_model['path']):
            os.remove(worst_model['path'])
            print_log(f"Deleted worst model by higher loss: {worst_model['path']} with Accuracy: {worst_model['accuracy']:.6f}, Loss: {worst_model['loss']:.6f}")
        saved_models.remove(worst_model)

def find_worst_model_by_combined_metric(saved_models, weight_loss=0.5, weight_accuracy=0.5):
    worst_model = None
    worst_combined_metric = float('inf')
    combined_matrix_values = []
    max_loss = 100
    min_loss = 0
    max_accuracy = 100
    min_accuracy = 0
    for model_info in saved_models:
        normalized_loss = (model_info['loss'] - min_loss) / (max_loss - min_loss)
        normalized_accuracy = (model_info['accuracy'] - min_accuracy) / (max_accuracy - min_accuracy)
        # find combined metric, We want high accuracy and less losses
        combined_metric = (weight_accuracy * normalized_accuracy) -(weight_loss * normalized_loss)
        combined_matrix_values.append(combined_metric)
        model_info["combined_metric"] = combined_metric
        
        # Find the model with the smallest combined metric
        if combined_metric < worst_combined_metric:
            worst_combined_metric = combined_metric
            worst_model = model_info
    if worst_model:
        print_log(f"Worst model : Path: {worst_model['path']}, Accuracy: {worst_model['accuracy']:.6f}, Loss: {worst_model['loss']:.6f}")
        print_log(f"Because this is have minimum combined matrix = {worst_combined_metric}, All combined matric {combined_matrix_values}")
    if worst_model:
        if os.path.exists(worst_model['path']):
            os.remove(worst_model['path'])
            print_log(f"Deleted worst model by less accuracy and higher loss: {worst_model['path']} with Accuracy: {worst_model['accuracy']:.6f}, Loss: {worst_model['loss']:.6f}")
        saved_models.remove(worst_model)

total_losses = []
total_accuracy = []

In [None]:
# Assuming `training_imu_data` is a nested list
flattened_length_train = sum(len(item) if isinstance(item, list) else 1 for sublist in training_imu_data for item in (sublist if isinstance(sublist, list) else [sublist]))
print("Length of flattened list:", flattened_length_train)

flattened_length_test = sum(len(item) if isinstance(item, list) else 1 for sublist in testing_imu_data for item in (sublist if isinstance(sublist, list) else [sublist]))
print("Length of flattened list:", flattened_length_test)

flattened_length_val = sum(len(item) if isinstance(item, list) else 1 for sublist in validation_imu_data for item in (sublist if isinstance(sublist, list) else [sublist]))
print("Length of flattened list:", flattened_length_val)


# Training

In [None]:
for i in range(init_epoch, epochs):
    model_saved = 0
    print_log(f"Epoch - {i+1} is started")

    t_loss = 0.0
    o_loss =0.0
    correct_predictions = 0
    total_predictions = 0
    
    start = time.time()
    model.train(True)
    
    # Training phase
    for batch_idx, item in enumerate(dataloader):
        anchor, positive, negative, anchor_class,negative_class = item
        optimizer.zero_grad()
        # Forward pass for triplet loss
#         class_scores,concatenated_embedding,embedding_1, embedding_2
        anchor_class_scores,anchor_features,anch_emb1,anch_emb2,anch_emb3,attention_matrix_anch= model(anchor.float()[:, :, :feature_length],anchor.float()[:, :, feature_length:2*feature_length],anchor.float()[:, :, 2*feature_length:])
        positive_class_scores,positive_features,posi_emb1,posi_emb2,posi_emb3,attention_matrix_posi = model(positive.float()[:, :, :feature_length],positive.float()[:, :, feature_length:2*feature_length],positive.float()[:, :, 2*feature_length:])
        negative_class_scores,negative_features,nega_emb1,nega_emb2,nega_emb3,attention_matrix_nega = model(negative.float()[:, :, :feature_length],negative.float()[:, :, feature_length:2*feature_length],negative.float()[:, :, 2*feature_length:])

        triplet_loss_place_1 = loss_fn(anch_emb1,posi_emb1,nega_emb1)
        triplet_loss_place_2 = loss_fn(anch_emb2,posi_emb2,nega_emb2)
        triplet_loss_place_3 = loss_fn(anch_emb3,posi_emb3,nega_emb3)
        triplet_loss_tot = triplet_loss_place_1+triplet_loss_place_2+ triplet_loss_place_3
        # overall_loss =loss_fn(anchor_features,positive_features,negative_features)
        # print(triplet_loss_tot,overall_loss)
        # Forward pass for classification loss

        all_class_scores = torch.cat([anchor_class_scores, positive_class_scores, negative_class_scores], dim=0)
        all_labels_train = torch.cat([anchor_class, anchor_class, negative_class], dim=0)  # Assuming the third element is the label
        class_loss = nn.CrossEntropyLoss()(all_class_scores, all_labels_train)

        total_loss =  class_loss + (0.1* triplet_loss_tot/3)
        # total_loss= triplet_loss_tot

        total_loss.backward()
        optimizer.step()
#         optimizer.zero_grad()
        
        t_loss += total_loss.item()
        
        _, predicted_labels = torch.max(all_class_scores, dim=1)
        correct_predictions += (predicted_labels == all_labels_train).sum().item()
        total_predictions += all_labels_train.size(0)
        # o_loss += overall_loss.item()
        
    
    t_loss /= len(dataloader)
    # o_loss /= len(dataloader)
    training_losses.append(t_loss)
    training_accuracy = correct_predictions / total_predictions * 100
    print_log(f"Epoch {i+1}: Training Loss = {t_loss:.4f}, Training Accuracy = {training_accuracy:.2f}%")
    # overall_losses.append(o_loss)
    


    
    # Validation phase
    model.eval()
    v_loss = 0.0
    all_preds = []
    all_labels = []
    t_dataset = TestDataset(validation_imu_data)
    
    t_dataloader = DataLoader(t_dataset, batch_size=batch_size, shuffle=False)
    tot = 0
    print_log(f"The lenth of t_dataloader : {len(t_dataloader)}")
    for batch_idx_t, item_t in enumerate(t_dataloader):
        with torch.no_grad():
            val = tot // 992
            tot += 1
            
            item_t_in,class_label=item_t
            item_out = model(item_t_in.float()[:, :, :feature_length],item_t_in.float()[:, :, feature_length:feature_length*2],item_t_in.float()[:,:,feature_length*2:])
            class_scores = item_out[0]
            attention_matrix_val=item_out[5]
            true_labels = class_label
            predicted_classes = torch.argmax(class_scores, dim=1)
            # if true_labels!=predicted_classes:
            # print(true_labels,predicted_classes)
            correct_predictions = (predicted_classes == true_labels)
            
            accuracy = correct_predictions.sum().item() / len(true_labels)
            
            class_loss_val = nn.CrossEntropyLoss()(class_scores, true_labels)
            v_loss += class_loss_val.item()
            all_preds.extend(predicted_classes.tolist())
            all_labels.extend(true_labels.tolist())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    v_loss /= len(t_dataloader)
    
    print_log(f"loop done  {tot}")
    # Step the scheduler based on the validation loss
    scheduler.step(v_loss)
    curr_lr = optimizer.param_groups[0]['lr']
    print(f"Epoch {i+1}/{epochs}, Validation Loss: {v_loss:.6f}, Current LR: {curr_lr:.6f}")
    wandb.log({"accuracy": accuracy, "loss": t_loss, "precision": precision, "recall": recall, "v_loss":v_loss, 'f1':f1, 'lr' : curr_lr,"training_accuracy":training_accuracy})
    # wandb.log({"accuracy": accuracy, "loss": t_loss, "precision": precision, "recall": recall, "v_loss":v_loss, 'f1':f1})
    end = time.time()
    
    total_losses.append(t_loss)
    validation_losses.append(v_loss)
    total_accuracy.append(accuracy)
    lr_history.append(curr_lr)
    
    print_log(f"------> Epoch No: {i+1} => Loss: {t_loss:.6f} >> Accuracy: {accuracy:.6f} >> Precision: {precision:.6f} >> Recall: {recall:.6f} >> F1: {f1:.6f} >> Time: {end-start:.2f}")
    
    # Check memory status
    if memory_is_low():
        print_log(f"Memory is running low, attempting to free up space by deleting the worst model.")
        # find_worst_model_by_loss(saved_models)
        find_worst_model_by_accuracy(saved_models)
        # find_worst_model_by_combined_metric(saved_models)
        # assign last best loss and accuracy in existing models
        if len(saved_models) > max_saved_models:
            best_loss, best_accuracy = find_worst_loss_and_acc_in_existing_model(saved_models)

    # Save model if validation loss improves and This model not saved before
    if t_loss < best_loss:
        print_log(f"Loss improved from {best_loss:.6f} to {t_loss:.6f}. ")
        if model_saved:
            print_log("Already model saved. Skip this model by loss.")
        else:
            print_log("Saving model.................")
            model_path = f"{best_model_save_path}/epoch_{i+1}_accuracy_{accuracy:.6f}_loss_{t_loss:.6f}.pt"
            torch.save(model, model_path)
            print_log(f"Best model saved at (by loss): {model_path}")
            saved_models.append({'path': model_path, 'loss': t_loss, 'accuracy' : accuracy})
            # Model saved
            model_saved = 1
    
            if len(saved_models) > max_saved_models:
                # find_worst_model_by_loss(saved_models)
                find_worst_model_by_accuracy(saved_models)
                # find_worst_model_by_combined_metric(saved_models)
            
                # assign last best loss and accuracy in existing models
                best_loss, best_accuracy = find_worst_loss_and_acc_in_existing_model(saved_models)


    # Save model if validation accuracy improves
    if accuracy > best_accuracy :
        print_log(f"Accuracy improved from {best_accuracy:.6f} to {accuracy:.6f}.")
        if model_saved:
            print_log("But, Already model saved. Skip this model by accuracy.")
        else:
            print_log("Saving model.................")
            model_path = f"{best_model_save_path}/epoch_{i+1}_accuracy_{accuracy:.6f}_loss_{t_loss:.6f}.pt"
            torch.save(model, model_path)
            print_log(f"Best model saved at (by accuracy): {model_path}")
            saved_models.append({'path': model_path, 'loss' : t_loss, 'accuracy': accuracy})
            # Model saved
            model_saved = 1
    
            if len(saved_models) > max_saved_models:
                # find_worst_model_by_loss(saved_models)
                find_worst_model_by_accuracy(saved_models)
#                 find_worst_model_by_combined_metric(saved_models)
            
                # assign last best loss and accuracy in existing models
                best_loss, best_accuracy = find_worst_loss_and_acc_in_existing_model(saved_models)
    


In [None]:
attention_matrix_val.shape

In [None]:
attention_matrix_val=attention_matrix_val.transpose(0,1)
attention_matrix_val=attention_matrix_val.squeeze(dim=2)
attention_matrix_val=attention_matrix_val.squeeze(dim=2)
attention_matrix_val.shape


In [None]:
attention_matrix_val

In [None]:
attention_matrix_val.sum(dim=1)

# Plot loss

In [None]:
# Plotting training and validation loss
plt.figure(figsize=(10, 5))
plt.plot(training_losses, label='Training Loss')
plt.plot(validation_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Over Epochs')
plt.show()

# Plot learning rate

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(range(1, epochs + 1), lr_history, marker='o', label="Learning Rate")
plt.xlabel("Epochs")
plt.ylabel("Learning Rate")
plt.title("Epochs vs. Learning Rate")
plt.grid(True)
plt.legend()
plt.show()

# Testing

In [None]:
class TestDataset(Dataset):
    def __init__(self, eval_data):
        self.eval_data = eval_data
        
        # Precompute the flattened index map
        self.index_map = []  # List to store (user_idx, session_idx, seq_idx)
        for user_idx, user_sessions in enumerate(eval_data):
            for session_idx, session_sequences in enumerate(user_sessions):
                for seq_idx in range(len(session_sequences)):
                    self.index_map.append((user_idx, session_idx, seq_idx))

    def __len__(self):
        # Total number of data points is the size of the index map
        return len(self.index_map)

    def __getitem__(self, idx):
        # Retrieve the hierarchical indices from the map
        user_idx, session_idx, seq_idx = self.index_map[idx]
        return self.eval_data[user_idx][session_idx][seq_idx],user_idx


In [None]:
testing_data=testing_imu_data
test_dataset = TestDataset(testing_data)
test_dataloader = DataLoader(test_dataset, batch_size=8)

In [None]:
# Get a list of files in the BEST_MODEL_PATH directory
BEST_MODEL_PATH='/kaggle/working/best_models/'
existing_models = os.listdir(BEST_MODEL_PATH)
best_accuracy_test=0
best_precision_test=0
best_recall_test=0
best_f1_test=0

# Loop through the files in the directory
for model_file in existing_models:
    feature_embeddings = []
    all_preds_test = []
    all_labels_test = []
    # Full path of the model
    model_path = os.path.join(BEST_MODEL_PATH, model_file)

    # Check if it's a file (and not a directory)
    if os.path.isfile(model_path):
        print(f"Processing model: {model_file}")
        test_model = torch.load(model_path)
        test_model.eval()
        test_model.train(False)
        for batch_idx_t, item_t in enumerate(test_dataloader):
            with torch.no_grad():
        
                item_t_in,class_label=item_t
        
                true_labels=class_label
        
                item_out = test_model(item_t_in.float()[:, :, :feature_length],item_t_in.float()[:, :, feature_length:feature_length*2],item_t_in.float()[:, :, 2*feature_length:])
                class_scores=item_out[0]
                feature_embeddings.append(item_out[1])
                predicted_classes = torch.argmax(class_scores, dim=1)
                all_preds_test.extend(predicted_classes.tolist())
                all_labels_test.extend(true_labels.tolist())
        accuracy_test = accuracy_score(all_labels_test, all_preds_test)
        precision_test = precision_score(all_labels_test, all_preds_test, average='macro')
        recall_test = recall_score(all_labels_test, all_preds_test, average='macro')
        f1_test = f1_score(all_labels_test, all_preds_test, average='macro')
        
        
        print_log(f"Accuracy: {accuracy_test:.6f} - Precision: {precision_test:.6f} - Recall: {recall_test:.6f} - F1: {f1_test:.6f}")
        if accuracy_test>best_accuracy_test:
            final_best_model=model_path
        best_accuracy_test = max(best_accuracy_test, accuracy_test)
        best_precision_test =max(best_precision_test,precision_test)
        best_recall_test =max(best_recall_test,recall_test)
        best_f1_test=max(best_f1_test,f1_test)
        
wandb.log({"Test Accuracy": best_accuracy_test, "Test Precision": best_precision_test, "Test Recall": best_recall_test, 'Test f1':best_f1_test})
print_log(final_best_model)

In [None]:
feature_embeddings = []
all_preds_test = []
all_labels_test = []
test_model = torch.load(final_best_model)
test_model.eval()
test_model.train(False)

In [None]:

feature_embeddings = []
tot=0
all_preds_test = []
all_labels_test = []

In [None]:

for batch_idx_t, item_t in enumerate(test_dataloader):
    with torch.no_grad():

        item_t_in,class_label=item_t

        true_labels=class_label

        item_out = test_model(item_t_in.float()[:, :, :feature_length],item_t_in.float()[:, :, feature_length:feature_length*2],item_t_in.float()[:, :, 2*feature_length:])
        class_scores=item_out[0]
        feature_embeddings.append(item_out[1])
        predicted_classes = torch.argmax(class_scores, dim=1)
        all_preds_test.extend(predicted_classes.tolist())
        all_labels_test.extend(true_labels.tolist())

In [None]:
accuracy_test = accuracy_score(all_labels_test, all_preds_test)
precision_test = precision_score(all_labels_test, all_preds_test, average='macro')
recall_test = recall_score(all_labels_test, all_preds_test, average='macro')
f1_test = f1_score(all_labels_test, all_preds_test, average='macro')

print_log(f"Accuracy: {accuracy_test:.6f} - Precision: {precision_test:.6f} - Recall: {recall_test:.6f} - F1: {f1_test:.6f}")
wandb.log({"Test Accuracy": accuracy_test, "Test Precision": precision_test, "Test Recall": recall_test, 'Test f1':f1_test})

# Training Accuracy

In [None]:
conf_matrix = confusion_matrix(all_labels, all_preds)

# Calculate the accuracy for each class
class_accuracies = conf_matrix.diagonal() / conf_matrix.sum(axis=1)

# Print the accuracy for each class
for i, class_accuracy in enumerate(class_accuracies):
    print_log(f"Accuracy for class {i}: {class_accuracy:.6f}")


plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
image_path = 'confusion_matrix.png'
plt.savefig(image_path)
wandb.log({
    "confusion_matrix": wandb.Image(image_path)  # Log the t-SNE plot image
})
plt.show()
print_log("Confusion matrix is create sucessfully using test")
wandb.log({"confusion_matrix_test": wandb.Image('confusion_matrix.png')})

# Testing Accuracy

In [None]:
conf_matrix = confusion_matrix(all_labels_test, all_preds_test)

# Calculate the accuracy for each class
class_accuracies = conf_matrix.diagonal() / conf_matrix.sum(axis=1)

# Print the accuracy for each class
for i, class_accuracy in enumerate(class_accuracies):
    print_log(f"Accuracy for class {i}: {class_accuracy:.6f}")


plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
image_path = 'confusion_matrix.png'
plt.savefig(image_path)
wandb.log({
    "confusion_matrix": wandb.Image(image_path)  # Log the t-SNE plot image
})
plt.show()
print_log("Confusion matrix is create sucessfully using test")
wandb.log({"confusion_matrix_test": wandb.Image('confusion_matrix.png')})

In [None]:

# Move each tensor to CPU, detach from computation graph, and convert to NumPy array
feature_embeddings_cpu = [emb.cpu().detach().numpy() for emb in feature_embeddings]

# Check for NaNs in individual arrays
for i, emb_np in enumerate(feature_embeddings_cpu):
    if np.isnan(emb_np).any():
        print_log(f"NaN detected in feature_embeddings_cpu at index {i}")

# Concatenate the list of numpy arrays into a single numpy array
feature_embeddings_np = np.concatenate(feature_embeddings_cpu, axis=0)



In [None]:
# Map activities to IDs with sequential keys from 0 to len(all_list)-1
activity_id_mapping = {idx: activity for idx, activity in enumerate(activity_names)}

# Output the results
# print("Shuffled Activity Names:", activity_names)
print("Activity ID Mapping:", activity_id_mapping)

In [None]:
# Calculate metrics for each class
results_df = pd.DataFrame(columns=columns)
accuracy_list = []
class_names = []
precision_list = []
recall_list = []
f1_list = []
for class_id, class_name in activity_id_mapping.items():
    # Get the true positive (diagonal) for the class and sum of the row (total samples for that class)
    true_positive = conf_matrix[class_id, class_id]
    total_samples = conf_matrix[class_id].sum()

    # Accuracy per class (True positives / Total samples in class)
    accuracy = true_positive / total_samples if total_samples > 0 else 0
    
    # Calculate precision, recall, and F1 score for the current class
    precision = precision_score(all_labels_test, all_preds_test, labels=[class_id], average='weighted', zero_division=0)
    recall = recall_score(all_labels_test, all_preds_test, labels=[class_id], average='weighted', zero_division=0)
    f1 = f1_score(all_labels_test, all_preds_test, labels=[class_id], average='weighted', zero_division=0)

    # Append results to respective lists
    class_names.append(class_name)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)

# Calculate the average across all classes (macro average)
average_accuracy = sum(accuracy_list) / len(accuracy_list)
average_precision = sum(precision_list) / len(precision_list)
average_recall = sum(recall_list) / len(recall_list)
average_f1 = sum(f1_list) / len(f1_list)

# Append the average values to the lists
class_names.append('Average')
accuracy_list.append(average_accuracy)
precision_list.append(average_precision)
recall_list.append(average_recall)
f1_list.append(average_f1)

# Create a DataFrame to store the results
results_df = pd.DataFrame({
    'Class': class_names,
    'Accuracy': accuracy_list,
    'Precision': precision_list,
    'Recall': recall_list,
    'F1-score': f1_list
})

# Save the results to CSV
results_df.to_csv('class_precision_recall_f1.csv', index=False)

# Display the final table
results_df

In [None]:


# Example feature embeddings and corresponding labels
# Replace these with your actual feature embeddings and labels
embeddings = feature_embeddings_np  # 100 samples, 128-dimensional embeddings
labels =np.stack(all_labels_test, axis=0) # 100 samples, 10 different classes

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne.fit_transform(embeddings)

# Plot t-SNE result
plt.figure(figsize=(10, 3))
scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1], c=labels, cmap='viridis', alpha=0.7)
plt.colorbar(scatter, label='Class Label')
plt.title('t-SNE Visualization of Feature Embeddings')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
image_path = 'tsne_plot.png'
plt.savefig(image_path)

# Log the image to wandb
wandb.log({
    "tsne_plot": wandb.Image(image_path)  # Log the t-SNE plot image
})
plt.show()



In [None]:
from collections import defaultdict

# Initialize dictionary to collect feature embeddings per true label
embedding_dict = defaultdict(list)

for batch_idx_t, item_t in enumerate(test_dataloader):
    with torch.no_grad():
        item_t_in, class_label = item_t
        true_labels = class_label

        # Forward pass through the model
        item_out = test_model(item_t_in.float()[:, :, :feature_length],item_t_in.float()[:, :, feature_length:feature_length*2],item_t_in.float()[:, :, 2*feature_length:])

        class_scores = item_out[0]
        feature_embedding = item_out[1]  # Shape: [batch_size, embedding_dim]

        # Append predictions and labels for evaluation
        predicted_classes = torch.argmax(class_scores, dim=1)
        all_preds_test.extend(predicted_classes.tolist())
        all_labels_test.extend(true_labels.tolist())

        # Store embeddings per true label
        for emb, label in zip(feature_embedding, true_labels):
            embedding_dict[label.item()].append(emb.cpu())


In [None]:
import numpy as np

# Convert each list of tensors to a NumPy array
embedding_dict_np = {
    label: np.stack([emb.numpy() for emb in embeddings])
    for label, embeddings in embedding_dict.items()
}

# Save the dictionary as a .npy file
np.save("feature_embeddings.npy", embedding_dict_np)


In [None]:
import numpy as np

# Load the file
file_path = "/kaggle/working/feature_embeddings.npy"
imu_data = np.load(file_path, allow_pickle=True)

# Extract the actual object if it's a dictionary or similar
imu_data = imu_data.item()  # This works if the saved object is a dict or list

# Check what keys it has
print(f"Type: {type(imu_data)}")
print(f"Keys: {imu_data.keys()}")

# Example: Access embeddings and labels if present
if 'embeddings' in imu_data and 'labels' in imu_data:
    embeddings = imu_data['embeddings']
    labels = imu_data['labels']
    
    print(f"Embeddings shape: {np.array(embeddings).shape}")
    print(f"Labels: {labels[:5]}")
# Check the type and size of one class's embeddings
sample_key = 0
sample_value = imu_data[sample_key]

print(f"Type of imu_data[{sample_key}]: {type(sample_value)}")

# If it's a list or array, show the shape and a sample
if isinstance(sample_value, (list, np.ndarray)):
    sample_array = np.array(sample_value)
    print(f"Shape of imu_data[{sample_key}]: {sample_array.shape}")
    # print(f"Sample embedding:\n{sample_array[0]}")
else:
    print(f"Unexpected format: {sample_value}")



# Small testing for visualization

In [None]:
cross_sensor = ["HC", "HA", "CH", "CA", "AH", "AC"]

In [None]:
testing_data_batches = {}  # Use a dictionary to store batches by key

for i in range(len(testing_data)):
    # Extract specific samples
    subset_samples = testing_data[i][0:2]  # Get the first two rows
    subset_samples = [row[0:4] for row in subset_samples]  # Get the first 10 elements of each row
    testing_data_batches[f'batch_{i}'] = subset_samples
    print(f"Subset samples: {np.array(subset_samples).shape}, {i, 'i'}")  # Check the shape


In [None]:
testing_data=[]
for batch_key, batch_data in testing_data_batches.items():
    print(f"Processing {batch_key}: Shape of batch data {np.array(batch_data).shape}")
    testing_data.append(batch_data)

In [None]:
testing_data=testing_data
test_dataset = TestDataset(testing_data)
test_dataloader = DataLoader(test_dataset, batch_size=8)

In [None]:
feature_embeddings = []
all_preds_test = []
all_labels_test = []
test_model = torch.load(final_best_model)
test_model.eval()
test_model.train(False)

In [None]:
overall_test=[]
for batch_idx_t, item_t in enumerate(test_dataloader):
    with torch.no_grad():

        item_t_in,class_label=item_t

        true_labels=class_label

        item_out = test_model(item_t_in.float()[:, :, :feature_length],item_t_in.float()[:, :, feature_length:feature_length*2],item_t_in.float()[:, :, 2*feature_length:])
        class_scores=item_out[0]
        feature_embeddings.append(item_out[1])
        attention_matrix_test=item_out[5]
        overall_test.append(attention_matrix_test)
        predicted_classes = torch.argmax(class_scores, dim=1)
        all_preds_test.extend(predicted_classes.tolist())
        all_labels_test.extend(true_labels.tolist())

In [None]:
# for batch in overall_test:
#     print(batch.shape)
#     batch=batch.transpose(0,1)
#     print(batch.shape)
#     for i in range(8):
#         print(batch[i].shape)

In [None]:
# import matplotlib.pyplot as plt

# for batch_idx, batch in enumerate(overall_test):
#     batch = batch.transpose(0, 1)
    
#     for i in range(8):
#         values = batch[i].cpu().numpy()  # Move to CPU and convert to numpy array
#         values = values.squeeze()  # Ensure it's 1D
        
#         plt.figure(figsize=(8, 4))
#         plt.bar(range(len(values)), values)  # Use bar plot
#         plt.title(f"Batch {batch_idx}, Index {i}")
#         plt.xlabel("Index")
#         plt.ylabel("Value")
#         plt.show()


In [None]:
cross_sensor = ["HC", "HA", "CH", "CA", "AH", "AC"]

In [None]:
import matplotlib.pyplot as plt

for batch_idx, batch in enumerate(overall_test):
    batch = batch.transpose(0, 1)
    class_name = activity_id_mapping[batch_idx]
    for i in range(8):
        values = batch[i]  # Move to CPU and convert to numpy array
        values = values.squeeze()  # Ensure it's 1D
        print(values)
        matrix = torch.zeros(3, 3)
        matrix[0, 1] = values[0]  # Row 0, Col 1
        matrix[0, 2] = values[1]  # Row 0, Col 2
        matrix[1, 0] = values[2]  # Row 1, Col 0
        matrix[1, 2] = values[3]  # Row 1, Col 2
        matrix[2, 0] = values[4]  # Row 2, Col 0
        matrix[2, 1] = values[5]  # Row 2, Col 1
        print("3x3 Matrix:\n", matrix)
        
        # Visualize the matrix as a heatmap
        plt.figure(figsize=(6, 6))
        sns.heatmap(matrix.cpu().numpy(), annot=True, fmt=".2f", cmap="viridis", square=True,xticklabels=["H", "C", "A"], yticklabels=["H", "C", "A"] )
        plt.title(f"Cross Sensor Attention - {class_name}")
        plt.xlabel("Placement - Query")
        plt.ylabel("Placement - Key")
        plt.show()



In [None]:
from matplotlib.colors import ListedColormap
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import numpy as np

# Create a colormap with black for masked values
cmap = sns.color_palette("viridis", as_cmap=True)
cmap.set_bad(color="#FFFFFF")  # Set the diagonal (masked) values to white

for batch_idx, batch in enumerate(overall_test):
    batch = batch.transpose(0, 1)
    class_name = activity_id_mapping[batch_idx]
    for i in range(8):
        values = batch[i]
        values = values.squeeze()  # Ensure it's 1D
        print(values)

        # Create 3x3 matrix
        matrix = torch.zeros(3, 3, device=values.device)  # Ensure matrix is on the same device as values
        matrix[0, 1] = values[0]  # Row 0, Col 1
        matrix[0, 2] = values[1]  # Row 0, Col 2
        matrix[1, 0] = values[2]  # Row 1, Col 0
        matrix[1, 2] = values[3]  # Row 1, Col 2
        matrix[2, 0] = values[4]  # Row 2, Col 0
        matrix[2, 1] = values[5]  # Row 2, Col 1

        print("3x3 Matrix:\n", matrix)

        # Mask the diagonal
        mask = torch.eye(3, device=values.device).bool()  # Mask for the diagonal (on the same device as matrix)
        matrix_np = matrix.cpu().numpy()  # Convert matrix to NumPy (move to CPU)
        mask_np = mask.cpu().numpy()  # Convert mask to NumPy (move to CPU)
        matrix_np[mask_np] = np.nan  # Set diagonal values to NaN

        # Visualize the heatmap
        plt.figure(figsize=(10, 3))
        ax = sns.heatmap(
            matrix_np, 
            annot=True, 
            fmt=".2f", 
            cmap=cmap,  # Use custom colormap
            square=True,
            xticklabels=["H", "C", "A"], 
            yticklabels=["H", "C", "A"], 
            mask=mask_np,  # Use the mask
            cbar_kws={"label": "Attention Weight"},  # Add color bar label
            linewidths=0.5,  # Add gridlines for the cells
            linecolor="grey"  # Set gridline color to grey for the background
        )

        # Add a grey background behind the heatmap
        ax.set_facecolor("#D3D3D3")  # Set facecolor for areas outside heatmap
        ax.figure.patch.set_facecolor("#D3D3D3")  # Set the overall figure background color
        plt.title(f"Cross Sensor Attention - {class_name}")
        plt.xlabel("Placement - Query")
        plt.ylabel("Placement - Key")
        plt.show()


In [None]:
wandb.finish()