In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
from collections import Counter

input_file_segments = '/content/drive/MyDrive/Project/ExtractedSegments_1s_set4.npy'
input_file_labels = '/content/drive/MyDrive/Project/ExtractedLabelas_1s_set4.npy'

loaded_segments = np.load(input_file_segments, allow_pickle=True)
loaded_labels = np.load(input_file_labels, allow_pickle=True)

print(f'Total segments loaded: {len(loaded_segments)}')
print(f'Total labels loaded: {len(loaded_labels)}')
print(f'Sample segment shape: {loaded_segments[0].shape}')
print(f'Sample label: {loaded_labels[0]}')

label_counts = Counter(loaded_labels)
print(f'Label counts: {label_counts}')

max_count = max(label_counts.values())

def add_gaussian_noise(data, mean=0, std_dev=0.05):
    noise = np.random.normal(mean, std_dev, data.shape)
    return data + noise

augmented_segments = []
augmented_labels = []

for segment, label in zip(loaded_segments, loaded_labels):
    if segment.shape == (125, 16):
        augmented_segments.append(segment)
        augmented_labels.append(label)
    else:
        print(f"Skipping segment with invalid shape: {segment.shape}")

for label, count in label_counts.items():
    if count < max_count:
        label_segments = [seg for seg, lbl in zip(loaded_segments, loaded_labels) if lbl == label]

        num_samples_needed = max_count - count

        for _ in range(num_samples_needed):
            segment = label_segments[np.random.randint(len(label_segments))]
            noisy_segment = add_gaussian_noise(segment)

            if noisy_segment.shape == (125, 16):
                augmented_segments.append(noisy_segment)
                augmented_labels.append(label)
            else:
                print(f"Generated segment with invalid shape: {noisy_segment.shape}")

augmented_segments = np.array(augmented_segments)
augmented_labels = np.array(augmented_labels)

print(f'Augmented data shape: {augmented_segments.shape}')
print(f'Augmented labels shape: {augmented_labels.shape}')
print(f'New label counts: {Counter(augmented_labels)}')

Total segments loaded: 937
Total labels loaded: 937
Sample segment shape: (125, 16)
Sample label: Walking
Label counts: Counter({'Walking': 290, 'Aha': 290, 'Doing Other Task': 290, 'Impasse': 42, 'Re-evaluation': 25})
Augmented data shape: (1450, 125, 16)
Augmented labels shape: (1450,)
New label counts: Counter({'Walking': 290, 'Aha': 290, 'Doing Other Task': 290, 'Impasse': 290, 'Re-evaluation': 290})


In [None]:
data = augmented_segments
final_labels = augmented_labels

In [None]:
import numpy as np

def compute_mean(data):
    return np.mean(data, axis=0)

mean_features = np.array([compute_mean(subject_data) for subject_data in data])

def compute_variance(data):
    return np.var(data, axis=0)

variance_features = np.array([compute_variance(subject_data) for subject_data in data])

def compute_first_diff(data):
    return np.mean(np.abs(np.diff(data, axis=0)), axis=0)

first_diff_features = np.array([compute_first_diff(subject_data) for subject_data in data])

def compute_second_diff(data):
    return np.mean(np.abs(np.diff(data, n=2, axis=0)), axis=0)

second_diff_features = np.array([compute_second_diff(subject_data) for subject_data in data])

combined_time_features = np.concatenate(
    [mean_features, variance_features, first_diff_features, second_diff_features], axis=1)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import numpy as np
import pandas as pd

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(final_labels)

concatenated_array_features = np.array(combined_time_features , dtype=np.float32)

X_train, X_test, y_train, y_test = train_test_split(concatenated_array_features, encoded_labels, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

classifiers = {
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "Gradient Boosting": GradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(),
    "Naive Bayes": GaussianNB()
}

results = {}

for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    results[name] = {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    }

    print(f"Results for {name}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("")

results_df = pd.DataFrame(results).T
print("\nSummary of Classifier Performance:")
print(results_df)

Results for SVM:
Accuracy: 0.3276
Precision: 0.4190
Recall: 0.3276
F1 Score: 0.2659

Results for Random Forest:
Accuracy: 0.5448
Precision: 0.5331
Recall: 0.5448
F1 Score: 0.5375



Parameters: { "use_label_encoder" } are not used.



Results for XGBoost:
Accuracy: 0.5069
Precision: 0.4898
Recall: 0.5069
F1 Score: 0.4973

Results for Gradient Boosting:
Accuracy: 0.5069
Precision: 0.4976
Recall: 0.5069
F1 Score: 0.5004





Results for AdaBoost:
Accuracy: 0.3759
Precision: 0.3895
Recall: 0.3759
F1 Score: 0.3759

Results for K-Nearest Neighbors:
Accuracy: 0.4690
Precision: 0.4004
Recall: 0.4690
F1 Score: 0.4126

Results for Logistic Regression:
Accuracy: 0.3690
Precision: 0.3866
Recall: 0.3690
F1 Score: 0.3253

Results for Naive Bayes:
Accuracy: 0.2069
Precision: 0.3259
Recall: 0.2069
F1 Score: 0.1558


Summary of Classifier Performance:
                     Accuracy  Precision    Recall  F1 Score
SVM                  0.327586   0.418963  0.327586  0.265877
Random Forest        0.544828   0.533142  0.544828  0.537486
XGBoost              0.506897   0.489777  0.506897  0.497305
Gradient Boosting    0.506897   0.497636  0.506897  0.500430
AdaBoost             0.375862   0.389461  0.375862  0.375865
K-Nearest Neighbors  0.468966   0.400386  0.468966  0.412603
Logistic Regression  0.368966   0.386636  0.368966  0.325339
Naive Bayes          0.206897   0.325899  0.206897  0.155795


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
