# TM10007 Assignment template -- ECG data

## Data loading and cleaning

Below are functions to load the dataset of your choice. After that, it is all up to you to create and evaluate a classification method. Beware, there may be missing values in these datasets. Good luck!

In [28]:
# Run this to use from Colab environment
!git clone https://github.com/jveenland/tm10007_ml.git

import zipfile
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Extract dataset
with zipfile.ZipFile('/content/tm10007_ml/ecg/ecg_data.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/tm10007_ml/ecg')

data = pd.read_csv('/content/tm10007_ml/ecg/ecg_data.csv', index_col=0)

print(f'The number of samples: {len(data.index)}')
print(f'The number of features: {len(data.columns) - 1}')  # Excluding label column

# Extract features and labels
X = data.iloc[:, :-1].values  # All columns except the last one
y = data.iloc[:, -1].values   # Last column as labels

# Variance calculation
X_variances = X.var(axis=0)  # Calculate variance for each feature
sorted_columns = pd.Series(X_variances).sort_values(ascending=False)  # Sort features by variance

# Select the first 50% of the columns
num_columns_to_select = round(len(sorted_columns) * 0.4)
selected_columns = sorted_columns[:num_columns_to_select]

# Reorder the columns in X based on the selected columns' indices
X_selected = data.iloc[:, selected_columns.index].values

# Scale the features
scaler = RobustScaler(quantile_range=(30,70))
X_scaled = scaler.fit_transform(X_selected)

# Split into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define SVM hyperparameters
degrees = [1, 3]
coef0s = [0.01, 0.3, 0.5]
slacks = [0.01, 0.3, 0.5]

# Train and evaluate SVM models with different hyperparameters
best_model = None
best_test_acc = 0

print("\nEvaluating SVM models with different hyperparameters:")
print("-" * 60)
for degree in degrees:
    for coef0 in coef0s:
        for slack in slacks:
            clf = SVC(kernel='poly', degree=degree, coef0=coef0, C=slack, gamma='scale')
            clf.fit(X_train, y_train)

            y_train_pred = clf.predict(X_train)
            y_test_pred = clf.predict(X_test)

            train_acc = accuracy_score(y_train, y_train_pred)
            test_acc = accuracy_score(y_test, y_test_pred)

            print(f"Degree: {degree}, Coef0: {coef0}, C: {slack}")
            print(f"  Training Accuracy: {train_acc:.4f}")
            print(f"  Testing Accuracy: {test_acc:.4f}")
            print("-" * 60)

            # Track best model
            if test_acc > best_test_acc:
                best_test_acc = test_acc
                best_model = clf

# Display the best performing model
print("\nBest Model:")
print(f"Degree: {best_model.degree}, Coef0: {best_model.coef0}, C: {best_model.C}")
print(f"Best Testing Accuracy: {best_test_acc:.4f}")


fatal: destination path 'tm10007_ml' already exists and is not an empty directory.
The number of samples: 827
The number of features: 9000

Evaluating SVM models with different hyperparameters:
------------------------------------------------------------
Degree: 1, Coef0: 0.01, C: 0.01
  Training Accuracy: 0.8215
  Testing Accuracy: 0.8313
------------------------------------------------------------
Degree: 1, Coef0: 0.01, C: 0.3
  Training Accuracy: 0.8260
  Testing Accuracy: 0.8193
------------------------------------------------------------
Degree: 1, Coef0: 0.01, C: 0.5
  Training Accuracy: 0.8336
  Testing Accuracy: 0.8373
------------------------------------------------------------
Degree: 1, Coef0: 0.3, C: 0.01
  Training Accuracy: 0.8215
  Testing Accuracy: 0.8313
------------------------------------------------------------
Degree: 1, Coef0: 0.3, C: 0.3
  Training Accuracy: 0.8260
  Testing Accuracy: 0.8193
------------------------------------------------------------
Degree: 1,