In [159]:
import pandas as pd
import numpy as np
from scipy.signal import butter, filtfilt, resample
import matplotlib.pyplot as plt

In [160]:
df = pd.read_csv("D:\shared_git\MaestriaThesis\FeaturesTabs\pp01_t3.csv")

new_column_names = ['channels', 'ID', 'Type', 'Epoch']
df.rename(columns=dict(zip(df.columns[:4], new_column_names)), inplace=True)

filtered_class_labels = df[df['channels'] == 5]
filtered_class_labels = df[df['Type'].isin([1,2,3,4,5,6,7,8,9,10,11,12])]
filtered_class_labels = filtered_class_labels.drop(columns=['ID'])
filtered_class_labels = filtered_class_labels.drop(columns=['channels', 'Epoch'])
filtered_class_labels = filtered_class_labels.reset_index(drop=True)

data_array = filtered_class_labels.iloc[:, 4:].values
print(data_array)

[[0.0418493  0.0506756  0.12915846 ... 1.10669469 4.32195351 3.77053649]
 [0.09349911 0.16133139 0.24302722 ... 1.70000567 5.09080783 4.41652664]
 [0.11789428 0.10897665 0.09611266 ... 0.13616219 0.36092414 0.27227442]
 ...
 [0.28932336 0.25342643 0.20882229 ... 0.48709229 2.02649375 4.09828131]
 [0.64274099 0.43413531 0.24110945 ... 0.30086708 0.78178367 1.19009372]
 [0.23931071 0.27934634 0.26201754 ... 0.57087999 1.96838703 3.53504059]]


In [161]:
class_labels = filtered_class_labels.iloc[:,0]
mods = class_labels.copy() 
mod = ['Air', 'Vib', 'Car']
mods[np.isin(class_labels,[1, 2, 3, 4])] = mod[0]
mods[np.isin(class_labels, [5, 6, 7, 8])] = mod[1]
mods[np.isin(class_labels, [9, 10, 11, 12])] = mod[2]
print(mods.head(7000)) 
class_labels = (class_labels - 1) % 4 + 1 

0       Air
1       Air
2       Air
3       Air
4       Air
       ... 
6995    Car
6996    Car
6997    Car
6998    Car
6999    Car
Name: Type, Length: 7000, dtype: object


In [176]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = data_array
y = mods.values

y = y.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn_model = KNeighborsClassifier(n_neighbors=13)

knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 45.17%


In [163]:
import pandas as pd
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

mutual_info_scores = mutual_info_classif(X_train, y_train)

for feature, score in zip(range(X_train.shape[1]), mutual_info_scores):
    print(f"Feature index: {feature}, Mutual Information Score: {score}")

threshold = 0.0001  

selected_features = np.where(mutual_info_scores > threshold)[0]

X_train_selected = X_train[:, selected_features]
X_test_selected = X_test[:, selected_features]

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_selected, y_train)

y_pred = model.predict(X_test_selected)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with selected features: {accuracy * 100:.2f}%")

Feature index: 0, Mutual Information Score: 0.0
Feature index: 1, Mutual Information Score: 0.01583827880454125
Feature index: 2, Mutual Information Score: 0.0
Feature index: 3, Mutual Information Score: 0.0
Feature index: 4, Mutual Information Score: 0.010581728343493246
Feature index: 5, Mutual Information Score: 0.0
Feature index: 6, Mutual Information Score: 0.011824932677851674
Feature index: 7, Mutual Information Score: 0.000286994574032029
Feature index: 8, Mutual Information Score: 0.0
Feature index: 9, Mutual Information Score: 0.0
Feature index: 10, Mutual Information Score: 0.0010869260077090992
Feature index: 11, Mutual Information Score: 0.0140393192083903
Feature index: 12, Mutual Information Score: 0.0
Feature index: 13, Mutual Information Score: 0.002557261020339041
Feature index: 14, Mutual Information Score: 0.0
Feature index: 15, Mutual Information Score: 0.003057897964903855
Feature index: 16, Mutual Information Score: 0.0007519003310978434
Feature index: 17, Mutual

In [164]:
print(selected_features)

[  1   4   6   7  10  11  13  15  16  17  19  20  22  24  25  26  27  28
  30  33  34  35  36  39  40  41  42  43  44  48  50  52  53  55  56  57
  58  62  63  64  65  66  68  69  70  71  72  73  74  75  76  77  78  79
  81  84  85  86  88  89  90  91  94  95  96  98 100 103 104 105 106 109
 110]


In [177]:
knn_model = KNeighborsClassifier(n_neighbors=13)

knn_model.fit(X_train_selected, y_train)

y_pred = knn_model.predict(X_test_selected)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 43.97%


In [175]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

param_grid = {'n_neighbors': list(range(1, 31))}
knn_model = KNeighborsClassifier()

grid_search = GridSearchCV(knn_model, param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train_selected, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation accuracy: {:.2f}%".format(grid_search.best_score_ * 100))

y_pred = grid_search.predict(X_test_selected)

accuracy = accuracy_score(y_test, y_pred)
print(f"Test set accuracy: {accuracy * 100:.2f}%")


Best parameters: {'n_neighbors': 13}
Best cross-validation accuracy: 44.26%
Test set accuracy: 43.97%


In [178]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_neighbors': list(range(1, 31)),
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [20, 30, 40],
    'p': [1, 2]
}

knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_selected, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation accuracy: {:.2f}%".format(grid_search.best_score_ * 100))


Best parameters: {'algorithm': 'auto', 'leaf_size': 20, 'n_neighbors': 20, 'p': 1, 'weights': 'distance'}
Best cross-validation accuracy: 46.65%


In [182]:
knn_model = KNeighborsClassifier(n_neighbors=20,p=1,weights='distance',leaf_size=20,algorithm='auto')

knn_model.fit(X_train_selected, y_train)

y_pred = knn_model.predict(X_test_selected)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 45.03%


In [181]:
knn_model = KNeighborsClassifier(n_neighbors=12)

knn_model.fit(X_train_selected, y_train)

y_pred = knn_model.predict(X_test_selected)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 44.04%


In [166]:
from sklearn.ensemble import GradientBoostingClassifier

gbm_model = GradientBoostingClassifier()

gbm_model.fit(X_train_selected, y_train)

y_pred = gbm_model.predict(X_test_selected)

accuracy = accuracy_score(y_test, y_pred)
print(f"GBM Accuracy: {accuracy * 100:.2f}%")

KeyboardInterrupt: 

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Initialize the model
log_reg_model = LogisticRegression()

# Fit the model on the training data
log_reg_model.fit(X_train_selected, y_train)

# Predict the labels for the test set
y_pred = log_reg_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Logistic Regression Accuracy: {accuracy * 100:.2f}%")

Logistic Regression Accuracy: 41.06%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
