# Sleep Disorder Classification — Training with Checkpoints & Confusion Matrices

**Loads preprocessed `.npy` files from Google Drive** (creates them from CSV if they don't exist yet).

- Separate confusion matrices for **Train / Validation / Test** per model
- Versioned **checkpoints** saved to Drive for iterative improvement

> Set runtime to **T4 GPU**: Runtime → Change runtime type

In [None]:
# 1. Setup
!pip install -q optuna imbalanced-learn seaborn

import torch
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)} ({torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB)')
else:
    print('WARNING: No GPU! Runtime -> Change runtime type -> T4 GPU')

In [None]:
# 2. Mount Drive
from google.colab import drive
import numpy as np
import joblib
import os

drive.mount('/content/drive')

DATA_DIR = '/content/drive/MyDrive/Sleep_Disorder_Project'
CKPT_DIR = os.path.join(DATA_DIR, 'checkpoints')
os.makedirs(CKPT_DIR, exist_ok=True)

In [None]:
# 3. Load or Create Preprocessed Data
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

npy_check = os.path.join(DATA_DIR, 'X_train_full.npy')

if os.path.exists(npy_check):
    print('Found existing .npy files. Loading...')
    X_train_full = np.load(os.path.join(DATA_DIR, 'X_train_full.npy'))
    y_train_full = np.load(os.path.join(DATA_DIR, 'y_train_full.npy'))
    X_test = np.load(os.path.join(DATA_DIR, 'X_test.npy'))
    y_test = np.load(os.path.join(DATA_DIR, 'y_test.npy'))
    le = joblib.load(os.path.join(DATA_DIR, 'label_encoder.joblib'))
else:
    print('.npy files not found. Preprocessing from CSV...')
    # Upload CSV if not present
    csv_path = 'sleep_dataset.csv'
    if not os.path.exists(csv_path):
        from google.colab import files
        print('Upload sleep_dataset.csv:')
        uploaded = files.upload()
        for fn in uploaded.keys():
            if fn != csv_path:
                os.rename(fn, csv_path)

    df = pd.read_csv(csv_path)
    if 'Person ID' in df.columns:
        df = df.drop(columns=['Person ID'])
    df['Sleep Disorder'] = df['Sleep Disorder'].fillna('None')
    print('Class distribution:')
    print(df['Sleep Disorder'].value_counts())

    if 'Blood Pressure' in df.columns:
        df[['Systolic_BP', 'Diastolic_BP']] = df['Blood Pressure'].str.split('/', expand=True).astype(float)
        df = df.drop(columns=['Blood Pressure'])
    if 'BMI Category' in df.columns:
        df['BMI Category'] = df['BMI Category'].replace({'Normal Weight': 'Normal'})

    target_col = 'Sleep Disorder'
    cat_cols = ['Gender', 'Occupation', 'BMI Category']
    num_cols = [c for c in df.columns if c not in cat_cols + [target_col]]

    preprocessor = ColumnTransformer([
        ('num', Pipeline([('scaler', StandardScaler())]), num_cols),
        ('cat', Pipeline([('ohe', OneHotEncoder(handle_unknown='ignore', sparse_output=False))]), cat_cols)
    ])

    X = df.drop(columns=[target_col])
    le = LabelEncoder()
    y = le.fit_transform(df[target_col])
    X_processed = preprocessor.fit_transform(X)

    X_train_raw, X_test, y_train_raw, y_test = train_test_split(
        X_processed, y, test_size=0.3, random_state=42, stratify=y)

    print('Applying SMOTE...')
    smote = SMOTE(random_state=42)
    X_train_full, y_train_full = smote.fit_resample(X_train_raw, y_train_raw)

    # Save to Drive
    np.save(os.path.join(DATA_DIR, 'X_train_full.npy'), X_train_full)
    np.save(os.path.join(DATA_DIR, 'y_train_full.npy'), y_train_full)
    np.save(os.path.join(DATA_DIR, 'X_test.npy'), X_test)
    np.save(os.path.join(DATA_DIR, 'y_test.npy'), y_test)
    joblib.dump(le, os.path.join(DATA_DIR, 'label_encoder.joblib'))
    print(f'Saved .npy files to {DATA_DIR}')

print(f'Train: {X_train_full.shape}, Test: {X_test.shape}')
print(f'Classes: {list(le.classes_)}')

In [None]:
# 4. Train/Val Split
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.15, random_state=42, stratify=y_train_full)
print(f'Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}')

In [None]:
# 5. Confusion Matrix Plotting
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score

def plot_confusion_matrices(model_name, y_sets, pred_sets, class_names, save_dir=None):
    fig, axes = plt.subplots(1, 3, figsize=(20, 5))
    fig.suptitle(f'{model_name} — Confusion Matrices', fontsize=16, fontweight='bold')
    for ax, split in zip(axes, ['Train', 'Val', 'Test']):
        cm = confusion_matrix(y_sets[split], pred_sets[split])
        acc = accuracy_score(y_sets[split], pred_sets[split])
        f1 = f1_score(y_sets[split], pred_sets[split], average='weighted')
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=class_names, yticklabels=class_names, ax=ax)
        ax.set_title(f'{split}\nAcc: {acc:.4f} | F1: {f1:.4f}')
        ax.set_ylabel('True'); ax.set_xlabel('Predicted')
    plt.tight_layout()
    if save_dir:
        path = os.path.join(save_dir, f'{model_name}_confusion.png')
        plt.savefig(path, dpi=150, bbox_inches='tight')
        print(f'Saved: {path}')
    plt.show()
    for split in ['Train', 'Val', 'Test']:
        print(f'\n--- {model_name} {split} ---')
        print(classification_report(y_sets[split], pred_sets[split], target_names=class_names))

In [None]:
# 6. Checkpoint & PyTorch Utilities
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler
import glob

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
USE_AMP = torch.cuda.is_available()
BATCH_SIZE = 512 if torch.cuda.is_available() else 64

class SleepDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

def get_ckpt_ver(name, d):
    files = glob.glob(os.path.join(d, f'{name}_v*.*'))
    if not files: return 1
    nums = []
    for f in files:
        try: nums.append(int(os.path.basename(f).split('_v')[1].split('.')[0]))
        except: pass
    return max(nums)+1 if nums else 1

def save_ckpt(model, name, ckpt_dir, pytorch=False, metrics=None):
    ver = get_ckpt_ver(name, ckpt_dir)
    if pytorch:
        path = os.path.join(ckpt_dir, f'{name}_v{ver}.pth')
        torch.save({'state_dict': model.state_dict(), 'v': ver, 'metrics': metrics or {}}, path)
    else:
        path = os.path.join(ckpt_dir, f'{name}_v{ver}.joblib')
        joblib.dump({'model': model, 'v': ver, 'metrics': metrics or {}}, path)
    print(f'Checkpoint: {path} (v{ver})')

def make_loader(X, y):
    return DataLoader(SleepDataset(X, y), batch_size=BATCH_SIZE,
                      pin_memory=torch.cuda.is_available(), num_workers=2)

def predict_torch(model, dl):
    model.eval()
    preds = []
    with torch.no_grad(), autocast(enabled=USE_AMP):
        for Xb, _ in dl:
            preds.extend(torch.max(model(Xb.to(DEVICE, non_blocking=True)), 1)[1].cpu().numpy())
    return np.array(preds)

print(f'Device: {DEVICE} | Batch: {BATCH_SIZE}')

In [None]:
# 7. Model Definitions
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

class ANN(nn.Module):
    def __init__(self, dim, layers=2, units=128, drop=0.3, nc=3):
        super().__init__()
        l = [nn.Linear(dim, units), nn.BatchNorm1d(units), nn.ReLU(), nn.Dropout(drop)]
        for _ in range(layers-1):
            l += [nn.Linear(units, units), nn.BatchNorm1d(units), nn.ReLU(), nn.Dropout(drop)]
        l.append(nn.Linear(units, nc))
        self.net = nn.Sequential(*l)
    def forward(self, x): return self.net(x)

class CNN(nn.Module):
    def __init__(self, dim, filt=32, ks=2, drop=0.3, nc=3):
        super().__init__()
        self.conv = nn.Conv1d(1, filt, ks)
        self.bn = nn.BatchNorm1d(filt)
        co = dim - ks + 1; po = co // 2
        self.pool = nn.MaxPool1d(2) if po > 0 else nn.Identity()
        if po <= 0: po = co
        self.drop = nn.Dropout(drop)
        self.fc = nn.Linear(filt * po, nc)
    def forward(self, x):
        x = self.drop(self.pool(torch.relu(self.bn(self.conv(x.unsqueeze(1))))))
        return self.fc(x.flatten(1))

INPUT_DIM = X_train.shape[1]
NC = len(le.classes_)
print(f'Input: {INPUT_DIM}, Classes: {NC}')

In [None]:
# 8. PyTorch Training Function
def train_model(model, name, X_tr, y_tr, X_v, y_v, epochs=20, lr=0.001):
    model.to(DEVICE)
    crit = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=lr)
    scaler = GradScaler(enabled=USE_AMP)
    dl = DataLoader(SleepDataset(X_tr, y_tr), batch_size=BATCH_SIZE, shuffle=True,
                    pin_memory=torch.cuda.is_available(), num_workers=2)
    for ep in range(epochs):
        model.train(); loss_sum = 0
        for Xb, yb in dl:
            Xb, yb = Xb.to(DEVICE, non_blocking=True), yb.to(DEVICE, non_blocking=True)
            opt.zero_grad(set_to_none=True)
            with autocast(enabled=USE_AMP): loss = crit(model(Xb), yb)
            scaler.scale(loss).backward(); scaler.step(opt); scaler.update()
            loss_sum += loss.item()
        vp = predict_torch(model, make_loader(X_v, y_v))
        vf1 = f1_score(y_v, vp, average='weighted')
        if (ep+1) % 5 == 0 or ep == 0:
            print(f'  Ep {ep+1}/{epochs} | Loss: {loss_sum/len(dl):.4f} | Val F1: {vf1:.4f}')
    return model

---
## Train & Evaluate All Models

In [None]:
# 9a. KNN
print('='*50, '\nKNN\n', '='*50)
knn = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='euclidean', n_jobs=-1)
knn.fit(X_train, y_train)
kp = {'Train': knn.predict(X_train), 'Val': knn.predict(X_val), 'Test': knn.predict(X_test)}
ys = {'Train': y_train, 'Val': y_val, 'Test': y_test}
plot_confusion_matrices('KNN', ys, kp, le.classes_, CKPT_DIR)
save_ckpt(knn, 'KNN', CKPT_DIR, metrics={'test_f1': f1_score(y_test, kp['Test'], average='weighted')})

In [None]:
# 9b. SVM
print('='*50, '\nSVM\n', '='*50)
n = min(len(X_train), 20000)
idx = np.random.choice(len(X_train), size=n, replace=False)
svm = SVC(C=10.0, kernel='rbf', gamma='scale')
svm.fit(X_train[idx], y_train[idx])
sp = {'Train': svm.predict(X_train), 'Val': svm.predict(X_val), 'Test': svm.predict(X_test)}
plot_confusion_matrices('SVM', ys, sp, le.classes_, CKPT_DIR)
save_ckpt(svm, 'SVM', CKPT_DIR, metrics={'test_f1': f1_score(y_test, sp['Test'], average='weighted')})

In [None]:
# 9c. Random Forest
print('='*50, '\nRandom Forest\n', '='*50)
rf = RandomForestClassifier(n_estimators=200, max_depth=30, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
rp = {'Train': rf.predict(X_train), 'Val': rf.predict(X_val), 'Test': rf.predict(X_test)}
plot_confusion_matrices('RF', ys, rp, le.classes_, CKPT_DIR)
save_ckpt(rf, 'RF', CKPT_DIR, metrics={'test_f1': f1_score(y_test, rp['Test'], average='weighted')})

In [None]:
# 9d. ANN
print('='*50, '\nANN\n', '='*50)
ann = ANN(INPUT_DIM, layers=2, units=128, drop=0.3, nc=NC)
ann = train_model(ann, 'ANN', X_train, y_train, X_val, y_val, epochs=20, lr=0.001)
tdl, vdl, sdl = make_loader(X_train, y_train), make_loader(X_val, y_val), make_loader(X_test, y_test)
ap = {'Train': predict_torch(ann, tdl), 'Val': predict_torch(ann, vdl), 'Test': predict_torch(ann, sdl)}
plot_confusion_matrices('ANN', ys, ap, le.classes_, CKPT_DIR)
save_ckpt(ann, 'ANN', CKPT_DIR, pytorch=True, metrics={'test_f1': f1_score(y_test, ap['Test'], average='weighted')})

In [None]:
# 9e. CNN
print('='*50, '\nCNN\n', '='*50)
cnn = CNN(INPUT_DIM, filt=32, ks=2, drop=0.3, nc=NC)
cnn = train_model(cnn, 'CNN', X_train, y_train, X_val, y_val, epochs=20, lr=0.001)
cp = {'Train': predict_torch(cnn, tdl), 'Val': predict_torch(cnn, vdl), 'Test': predict_torch(cnn, sdl)}
plot_confusion_matrices('CNN', ys, cp, le.classes_, CKPT_DIR)
save_ckpt(cnn, 'CNN', CKPT_DIR, pytorch=True, metrics={'test_f1': f1_score(y_test, cp['Test'], average='weighted')})

In [None]:
# 10. Summary + Overfitting Check
import pandas as pd

all_p = {'KNN': kp, 'SVM': sp, 'RF': rp, 'ANN': ap, 'CNN': cp}
rows = {}
for m, preds in all_p.items():
    for s in ['Train','Val','Test']:
        rows[f'{m}_{s}'] = {
            'Accuracy': accuracy_score(ys[s], preds[s]),
            'F1': f1_score(ys[s], preds[s], average='weighted')}

print('\n'+'='*60+'\nFULL RESULTS\n'+'='*60)
print(pd.DataFrame(rows).T.to_string(float_format='%.4f'))

print('\n--- Overfitting Check ---')
for m in ['KNN','SVM','RF','ANN','CNN']:
    tr = rows[f'{m}_Train']['F1']; te = rows[f'{m}_Test']['F1']; g = tr-te
    st = 'OK' if g<0.05 else 'OVERFIT' if g>0.1 else 'MODERATE'
    print(f'  {m:4s}: Train={tr:.4f} Test={te:.4f} Gap={g:.4f} [{st}]')

In [None]:
# 11. List Checkpoints
print('Saved checkpoints:')
for f in sorted(os.listdir(CKPT_DIR)):
    sz = os.path.getsize(os.path.join(CKPT_DIR, f)) / (1024*1024)
    print(f'  {f} ({sz:.1f} MB)')