In [14]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, balanced_accuracy_score, roc_auc_score
from sklearn.utils import shuffle

In [15]:
# Step 1: Load Data
train_data = pd.read_csv('C:/Users/User/Desktop/Hackathon/Impulse/output/standardized_features.csv')
val_data = pd.read_csv('C:/Users/User/Desktop/Hackathon/Impulse/output/standardized_features_validation.csv')
test_data = pd.read_csv('C:/Users/User/Desktop/Hackathon/Impulse/output/standardized_features_test.csv')


In [16]:
# Step 2: Map Class Labels to Integers
class_mapping = {
    'Complex_Partial_Seizures': 0,
    'Electrographic_Seizures': 1,
    'Normal': 2,
    'Video_detected_Seizures_with_no_visual_change_over_EEG': 3
}

# Apply the mapping to train and validation data
train_data['class'] = train_data['class'].map(class_mapping)
val_data['class'] = val_data['class'].map(class_mapping)

In [17]:
# Step 3: Preprocessing
# Select relevant features
selected_features = [
    'zcr', 'fft_mean', 'fft_std', 'mean_freq', 'median_freq', 
    'psd', 'approx_mean', 'approx_std', 'detail1_mean', 'detail1_std'
]

# Split features (X) and labels (y) for training and validation datasets
X_train = train_data[selected_features]
y_train = train_data['class']

X_val = val_data[selected_features]
y_val = val_data['class']

# Test data: Exclude `file` and `channel` columns, use only features
X_test = test_data[selected_features]  # Test data features

# Normalize/scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [18]:
# Step 4: Define Model
xgb_model = XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)


In [19]:
# Step 5: Train the Model
xgb_model.fit(X_train_scaled, y_train)

Parameters: { "use_label_encoder" } are not used.



In [20]:
# Step 6: Evaluate on Validation Data
y_val_pred = xgb_model.predict(X_val_scaled)
y_val_prob = xgb_model.predict_proba(X_val_scaled)

balanced_acc = balanced_accuracy_score(y_val, y_val_pred)
roc_auc = roc_auc_score(pd.get_dummies(y_val), y_val_prob, multi_class='ovr')
classification_rep = classification_report(y_val, y_val_pred)

print("Validation Metrics:")
print("Balanced Accuracy:", balanced_acc)
print("ROC AUC Score:", roc_auc)
print("Classification Report:\n", classification_rep)


Validation Metrics:
Balanced Accuracy: 0.7245850944582617
ROC AUC Score: 0.9457026319681355
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.74      0.79     10431
           1       0.76      0.35      0.48      2603
           2       0.76      0.92      0.83     13224
           3       0.99      0.89      0.93       399

    accuracy                           0.79     26657
   macro avg       0.84      0.72      0.76     26657
weighted avg       0.80      0.79      0.78     26657



In [21]:
# Step 7: Predict on Test Data
# Since test data doesn't have labels, we'll predict the class and probabilities
y_test_pred = xgb_model.predict(X_test_scaled)
y_test_prob = xgb_model.predict_proba(X_test_scaled)

# Save predictions to CSV
test_predictions = pd.DataFrame({
    'File': test_data['file'],
    'Channel': test_data['channel'],
    'Predicted Label': y_test_pred,
    'Probability': np.max(y_test_prob, axis=1)  # Max probability for the predicted class
})
test_predictions.to_csv("test_predictions.csv", index=False)

print("Predictions on test data saved to 'test_predictions.csv'.")


Predictions on test data saved to 'test_predictions.csv'.
