In [3]:
import numpy as np
import pandas as pd
import pywt
import glob
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.feature_selection import SelectKBest, f_classif

# Load all CSV files
csv_files = glob.glob("/content/*.csv")
dfs = []
for file in csv_files:
    temp_df = pd.read_csv(file)
    # Check the number of columns in the current file
    num_columns = len(temp_df.columns)
    # Create column names based on the number of columns
    column_names = [str(i+1) for i in range(num_columns)]
    temp_df.columns = column_names
    dfs.append(temp_df)

comb_df = pd.concat(dfs)
comb_df.reset_index(drop=True, inplace=True)

# Define markers
start_markers = ["start zero.png", "start one.png", "start two.jpg", "start three.jpg", "start four.jpg", "start five.jpg", "start six.jpg", "start seven.jpg", "start eight.jpg", "start nine.jpg", "start ten.jpg"]
end_markers = ["end zero.png", "end one.png", "end two.jpg", "end three.jpg", "end four.jpg", "end five.jpg", "end six.jpg", "end seven.jpg", "end eight.jpg", "end nine.jpg", "end ten.jpg"]

# Initialize start and end index arrays
num_signals = len(csv_files)
num_classes = len(start_markers)
s = np.zeros((num_signals, num_classes), dtype=int)
e = np.zeros((num_signals, num_classes), dtype=int)

# Find start and end indices for each signal and class
for j in range(num_classes):
    fil = 0
    for i in range(len(comb_df)):
        if comb_df['1'].iloc[i] == start_markers[j]:
            s[fil][j] = i
            continue
        if comb_df['1'].iloc[i] == end_markers[j]:
            e[fil][j] = i
            fil += 1
        if fil == num_signals:
            break

# Extract signals from both channels separately
signals_channel_1 = []
signals_channel_2 = []
for i in range(num_signals):
    for j in range(num_classes):
        signal_channel_1 = comb_df['1'].iloc[s[i][j]+1:e[i][j]].to_numpy()
        signal_channel_2 = comb_df['2'].iloc[s[i][j]+1:e[i][j]].to_numpy()
        signals_channel_1.append(np.nan_to_num(pd.to_numeric(signal_channel_1, errors='coerce')))
        signals_channel_2.append(np.nan_to_num(pd.to_numeric(signal_channel_2, errors='coerce')))

# Time slicing function for data augmentation
def time_slicing(signal, slice_size=100, overlap=50):
    slices = []
    for start in range(0, len(signal) - slice_size + 1, overlap):
        slices.append(signal[start:start + slice_size])
    return slices

# Apply time slicing to generate augmented signals for both channels
def augment_signals(signals, num_classes):
    augmented_signals = []
    augmented_labels = []
    for label, signal in enumerate(signals):
        sliced_signals = time_slicing(signal)
        augmented_signals.extend(sliced_signals)
        augmented_labels.extend([label // num_classes] * len(sliced_signals))  # Ensure labels correspond to class
    return augmented_signals, augmented_labels

augmented_signals_channel_1, augmented_labels_channel_1 = augment_signals(signals_channel_1, num_classes)
augmented_signals_channel_2, augmented_labels_channel_2 = augment_signals(signals_channel_2, num_classes)

# Extract wavelet features from augmented signals for both channels
def extract_wavelet_features(signal):
    coeffs = pywt.wavedec(signal, 'db1', level=5)
    wavelet_features = np.concatenate([np.abs(c).reshape(-1) for c in coeffs])
    return wavelet_features

wavelet_features_channel_1 = [extract_wavelet_features(signal) for signal in augmented_signals_channel_1]
wavelet_features_channel_2 = [extract_wavelet_features(signal) for signal in augmented_signals_channel_2]

# Prepare DataFrame for both channels
def prepare_wavelet_df(wavelet_features, augmented_labels):
    wavelet_df = pd.DataFrame(wavelet_features)
    wavelet_df['label'] = augmented_labels
    wavelet_df = wavelet_df.fillna(0)
    return wavelet_df

wavelet_df_channel_1 = prepare_wavelet_df(wavelet_features_channel_1, augmented_labels_channel_1)
wavelet_df_channel_2 = prepare_wavelet_df(wavelet_features_channel_2, augmented_labels_channel_2)

def process_data(wavelet_df):
    X = wavelet_df.drop('label', axis=1)
    y = wavelet_df['label']

    # Adjust test_size to ensure at least one sample per class
    test_size = max(10, len(y) // 10)  # Ensure the test set has at least 10 samples
    validation_size = 0.2

    # Split the data into train+validation and test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)

    # Further split the train+validation set into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=validation_size, random_state=42, stratify=y_train_val)

    # Apply Standard Scaling and Normalization
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    normalizer = Normalizer()
    X_train_normalized = normalizer.fit_transform(X_train_scaled)
    X_val_normalized = normalizer.transform(X_val_scaled)
    X_test_normalized = normalizer.transform(X_test_scaled)

    # Feature selection
    selector = SelectKBest(f_classif, k=50)
    X_train_selected = selector.fit_transform(X_train_normalized, y_train)
    X_val_selected = selector.transform(X_val_normalized)
    X_test_selected = selector.transform(X_test_normalized)

    return X_train_selected, X_val_selected, X_test_selected, y_train, y_val, y_test

X_train_selected_1, X_val_selected_1, X_test_selected_1, y_train_1, y_val_1, y_test_1 = process_data(wavelet_df_channel_1)
X_train_selected_2, X_val_selected_2, X_test_selected_2, y_train_2, y_val_2, y_test_2 = process_data(wavelet_df_channel_2)

# Initialize classifiers
svm = SVC()
knn = KNeighborsClassifier(weights='distance')
nb = GaussianNB()

# Hyperparameter tuning for SVM
param_grid_svm = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf']}
strat_kfold = StratifiedKFold(n_splits=2)  # Adjust n_splits to 2
grid_svm = GridSearchCV(SVC(), param_grid_svm, cv=strat_kfold, refit=True, verbose=2)

# Hyperparameter tuning for KNN with distance weights
param_grid_knn = {'n_neighbors': [3, 5, 7, 9]}
grid_knn = GridSearchCV(KNeighborsClassifier(weights='distance'), param_grid_knn, cv=strat_kfold, refit=True, verbose=2)

def train_and_evaluate(models, X_train_selected, X_val_selected, X_test_selected, y_train, y_val, y_test, channel_name):
    print(f"Results for {channel_name}")
    for name, model in models.items():
        if name == 'SVM':
            grid_svm.fit(X_train_selected, y_train)
            model = grid_svm.best_estimator_
        elif name == 'KNN':
            grid_knn.fit(X_train_selected, y_train)
            model = grid_knn.best_estimator_
        else:
            model.fit(X_train_selected, y_train)

        pred_val = model.predict(X_val_selected)
        pred_test = model.predict(X_test_selected)
        val_accuracy = accuracy_score(y_val, pred_val)
        test_accuracy = accuracy_score(y_test, pred_test)
        print(f"{name} Validation Accuracy: {val_accuracy}")
        print(f"{name} Test Accuracy: {test_accuracy}")

models = {'SVM': svm, 'KNN': knn, 'Naive Bayes': nb}

# Train and evaluate for Channel 1
train_and_evaluate(models, X_train_selected_1, X_val_selected_1, X_test_selected_1, y_train_1, y_val_1, y_test_1, "Channel 1")

# Train and evaluate for Channel 2
train_and_evaluate(models, X_train_selected_2, X_val_selected_2, X_test_selected_2, y_train_2, y_val_2, y_test_2, "Channel 2")


  f = msb / msw
  f = msb / msw


Results for Channel 1
Fitting 2 folds for each of 16 candidates, totalling 32 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.5s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.5s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.6s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.6s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.6s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.6s
[CV] END .....................C=0.1, gamma=0.001, kernel=rbf; total time=   0.6s
[CV] END .....................C=0.1, gamma=0.001, kernel=rbf; total time=   0.6s
[CV] END ...........................C=1, gamma=1, kernel=rbf; total time=   0.4s
[CV] END ...........................C=1, gamma=1, kernel=rbf; total time=   0.4s
[CV] END .........................C=1, gamma=0.1, kernel=rbf; total time=   0.7s
[CV] END .................