In [1]:
import openpyxl
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as signal
import pywt

In [2]:
H_file_wb = openpyxl.load_workbook("/kaggle/input/hci-data/output_data.xlsx")
# print(H_file_wb.sheetnames)

H_file_ws = H_file_wb['File_Data']

cols = list(H_file_ws.columns)
num_cols = int(H_file_ws.max_column)

In [3]:
# Define the Butterworth filter parameters
lowcut = 0.5  # Low cutoff frequency (Hz)
highcut = 20  # High cutoff frequency (Hz)
order = 4  # Filter order

# Initialize lists to store statistical features
means = []
variances = []
std_devs = []

# Lists to store filtered and preprocessed signals
preprocessed_signals = []

for i in range (num_cols):
    rows_range = cols[i]
    signal_data = []
    for a in rows_range[1:]:  # Start from the second row
        signal_data.append(a.value)
        signal_data = [float(a.value) for a in rows_range[1:] if a.value is not None]

    #print(signal_data)

    # Plot the original signal
    plt.figure(figsize=(12, 6))
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(0, len(signal_data)), signal_data)
    plt.xlabel('Time')
    plt.ylabel('Amplitude')
    plt.title('Original EOG Signal')

    # Apply Butterworth bandpass filter
    b, a = signal.butter(order, [lowcut, highcut], btype='band', fs=1000)
    filtered_signal = signal.filtfilt(b, a, signal_data, axis=0)  # Assuming the signal is stored along axis 0

    # Plot the filtered signal
    plt.subplot(3, 1, 2)
    plt.plot(np.arange(0, len(filtered_signal)), filtered_signal)
    plt.xlabel('Time')
    plt.ylabel('Amplitude')
    plt.title('Filtered EOG Signal')


    # Remove DC offset
    dc_offset_removed_signal = filtered_signal - np.mean(filtered_signal)

    # Plot the filtered and DC offset removed signal
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(0, len(dc_offset_removed_signal)), dc_offset_removed_signal)
    plt.xlabel('Time')
    plt.ylabel('Amplitude')
    plt.title('Filtered and DC Offset Removed EOG Signal')

    plt.tight_layout()
#     plt.show()
#     break
    plt.close()
    
    preprocessed_signals.append(dc_offset_removed_signal)
    
    #First Feature Extraction
    # Calculate statistical features from normalized signal
    mean_value = np.mean(dc_offset_removed_signal)
    variance_value = np.var(dc_offset_removed_signal)
    std_dev_value = np.std(dc_offset_removed_signal)

    # Append the statistical features to the respective lists
    means.append(mean_value)
    variances.append(variance_value)
    std_devs.append(std_dev_value)
    

In [4]:
print(len(preprocessed_signals))

201


In [5]:
#DecisionTree
#Statistical Features from raw samples
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Excel workbook with class labels
H_file_wb = openpyxl.load_workbook("/kaggle/input/hci-data/output_class.xlsx")
H_file_ws = H_file_wb['File_Class']

# Extract class labels, skipping the header
labels = []
for index, row in enumerate(H_file_ws.iter_rows(values_only=True), start=1):
    if index > 1:  # Skip the first row (header)
        labels.append(row[1])  # Assuming class name is in the second column

# Convert lists to NumPy arrays
X = np.array([means, variances, std_devs]).T  # Features
y = labels

# Initialize Decision Tree classifier
tree_clf = DecisionTreeClassifier(random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(tree_clf, X, labels, cv=5)  # 5-fold cross-validation

# Print cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Cross-Validation Score:", np.mean(cv_scores))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree classifier
tree_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = tree_clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Cross-Validation Scores: [0.19512195 0.325      0.325      0.225      0.175     ]
Mean Cross-Validation Score: 0.2490243902439025
Accuracy: 0.36585365853658536


In [6]:
#RandomForest
#Statistical Features from raw samples

from sklearn.model_selection import train_test_split , cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the Excel workbook with class labels
H_file_wb = openpyxl.load_workbook("/kaggle/input/hci-data/output_class.xlsx")
H_file_ws = H_file_wb['File_Class']

# Extract class labels, skipping the header
labels = []
for index, row in enumerate(H_file_ws.iter_rows(values_only=True), start=1):
    if index > 1:  # Skip the first row (header)
        labels.append(row[1])  # Assuming class name is in the second column

# Convert lists to NumPy arrays
X = np.array([means, variances, std_devs]).T  # Features
y = labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.2, random_state=42)

# Initialize Random Forest classifier
rf_clf = RandomForestClassifier(random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(rf_clf, X, labels, cv=8)  # 5-fold cross-validation

# Print cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Accuracy:", np.mean(cv_scores))

# Train Random Forest classifier
rf_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = rf_clf.predict(X_test)


# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Cross-Validation Scores: [0.23076923 0.24       0.4        0.24       0.28       0.28
 0.24       0.16      ]
Mean Accuracy: 0.25884615384615384
Accuracy: 0.36585365853658536


In [23]:
#SVM
#Statistical Features from raw samples

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import openpyxl

# Load the Excel workbook with class labels
H_file_wb = openpyxl.load_workbook("/kaggle/input/hci-data/output_class.xlsx")
H_file_ws = H_file_wb['File_Class']

# Extract class labels, skipping the header
labels = []
for index, row in enumerate(H_file_ws.iter_rows(values_only=True), start=1):
    if index > 1:  # Skip the first row (header)
        labels.append(row[1])  # Assuming class name is in the second column

        
# Convert lists to NumPy arrays
X = np.array([means, variances, std_devs]).T  # Features

# Initialize SVM classifier
svm_clf = SVC(kernel='linear', random_state=42)  # You can change the kernel type as needed

# Perform cross-validation
cv_scores = cross_val_score(svm_clf, X, labels, cv=10)  # 10-fold cross-validation

# Print cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Cross-Validation Score:", np.mean(cv_scores))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train SVM classifier
svm_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = svm_clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Cross-Validation Scores: [0.38095238 0.3        0.3        0.45       0.35       0.3
 0.25       0.4        0.4        0.2       ]
Mean Cross-Validation Score: 0.3330952380952381
Accuracy: 0.3170731707317073


# **wavelet**

In [None]:
# Define the wavelet parameters
wavelet = 'db1'  # Daubechies wavelet db1
level = 1  # Wavelet decomposition level

# List to store wavelet coefficients for each signal
wavelet_coeffs_list = []

# Loop over each filtered and preprocessed signal
for signal_data in preprocessed_signals:
    # Perform wavelet decomposition
    coeffs = pywt.wavedec(signal_data, wavelet, level=level)

    
    # Append wavelet coefficients to the list
    wavelet_coeffs_list.append(coeffs)

# Print or use the list of wavelet coefficients as needed
print("Wavelet coefficients list:", wavelet_coeffs_list)


In [17]:
#DecisionTree
#Statistical Features from wavelet coefficients 
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import openpyxl

# Load the Excel workbook with class labels
H_file_wb = openpyxl.load_workbook("/kaggle/input/hci-data/output_class.xlsx")
H_file_ws = H_file_wb['File_Class']

# Extract class labels, skipping the header
labels = []
for index, row in enumerate(H_file_ws.iter_rows(values_only=True), start=1):
    if index > 1:  # Skip the first row (header)
        labels.append(row[1])  # Assuming class name is in the second column

# Convert the list of wavelet coefficients to NumPy array
X = np.array(wavelet_coeffs_list)

X = X.reshape(X.shape[0], -1)

# Initialize Decision Tree classifier
tree_clf = DecisionTreeClassifier(random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(tree_clf, X, labels, cv=10)  # 5-fold cross-validation

# Print cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Cross-Validation Score:", np.mean(cv_scores))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train Decision Tree classifier
tree_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = tree_clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Cross-Validation Scores: [0.57142857 0.55       0.4        0.55       0.6        0.2
 0.2        0.55       0.35       0.25      ]
Mean Cross-Validation Score: 0.42214285714285715
Accuracy: 0.2926829268292683


In [19]:
#RandomForest
#Statistical Features from wavelet coefficients 

from sklearn.model_selection import train_test_split , cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the Excel workbook with class labels
H_file_wb = openpyxl.load_workbook("/kaggle/input/hci-data/output_class.xlsx")
H_file_ws = H_file_wb['File_Class']

# Extract class labels, skipping the header
labels = []
for index, row in enumerate(H_file_ws.iter_rows(values_only=True), start=1):
    if index > 1:  # Skip the first row (header)
        labels.append(row[1])  # Assuming class name is in the second column

# Convert the list of wavelet coefficients to NumPy array
X = np.array(wavelet_coeffs_list)

X = X.reshape(X.shape[0], -1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels , test_size=0.2, random_state=42)

# Initialize Random Forest classifier
rf_clf = RandomForestClassifier(random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(rf_clf, X, labels, cv=8)  # 5-fold cross-validation

# Print cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Accuracy:", np.mean(cv_scores))

# Train Random Forest classifier
rf_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = rf_clf.predict(X_test)


# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Cross-Validation Scores: [0.57692308 0.6        0.76       0.72       0.64       0.44
 0.56       0.64      ]
Mean Accuracy: 0.6171153846153846
Accuracy: 0.6585365853658537


In [21]:
#SVM
#Statistical Features from wavelet coefficients 

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import openpyxl

# Load the Excel workbook with class labels
H_file_wb = openpyxl.load_workbook("/kaggle/input/hci-data/output_class.xlsx")
H_file_ws = H_file_wb['File_Class']

# Extract class labels, skipping the header
labels = []
for index, row in enumerate(H_file_ws.iter_rows(values_only=True), start=1):
    if index > 1:  # Skip the first row (header)
        labels.append(row[1])  # Assuming class name is in the second column

# Convert the list of wavelet coefficients to NumPy array
X = np.array(wavelet_coeffs_list)

X = X.reshape(X.shape[0], -1)

# Initialize SVM classifier
svm_clf = SVC(kernel='linear', random_state=42)  # You can change the kernel type as needed

# Perform cross-validation
cv_scores = cross_val_score(svm_clf, X, labels, cv=5)  # 10-fold cross-validation

# Print cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Cross-Validation Score:", np.mean(cv_scores))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train SVM classifier
svm_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = svm_clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Cross-Validation Scores: [0.58536585 0.65       0.575      0.5        0.5       ]
Mean Cross-Validation Score: 0.5620731707317074
Accuracy: 0.5365853658536586
