In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load preprocessed ML-ready CSV
df = pd.read_csv("tess_toi_ml_ready.csv")

# Features (X) and target (y)
X = df.drop(columns=["tfopwg_disp"])
y = df["tfopwg_disp"]

X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=0.1,
    stratify=y,   # ensures proportional class distribution
    random_state=42
)

# Save splits (optional)
# X_train.to_csv("X_train.csv", index=False)
# X_val.to_csv("X_val.csv", index=False)
# y_train.to_csv("y_train.csv", index=False)
# y_val.to_csv("y_val.csv", index=False)

print("✅ Train-Validation split complete")
print("Train shape:", X_train.shape, " Validation shape:", X_val.shape)
print("\nClass distribution in train set:\n", y_train.value_counts(normalize=True))
print("\nClass distribution in val set:\n", y_val.value_counts(normalize=True))


✅ Train-Validation split complete
Train shape: (6932, 31)  Validation shape: (771, 31)

Class distribution in train set:
 tfopwg_disp
5    0.607473
3    0.155366
1    0.088719
4    0.075736
0    0.060012
2    0.012695
Name: proportion, dtype: float64

Class distribution in val set:
 tfopwg_disp
5    0.607004
3    0.155642
1    0.089494
4    0.075227
0    0.059663
2    0.012970
Name: proportion, dtype: float64


In [2]:
# ========================
# Logistic Regression GPU/CPU Training Pipeline
# ========================

import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

# Try GPU (cuML), fallback to CPU (scikit-learn)
try:
    from cuml.linear_model import LogisticRegression as cuLogisticRegression
    from cuml.preprocessing.model_selection import train_test_split as cu_train_test_split
    print("[INFO] cuML found - Using GPU Logistic Regression")
    USE_GPU = True
except ImportError:
    from sklearn.linear_model import LogisticRegression
    print("[INFO] cuML not found - Falling back to CPU Logistic Regression")
    USE_GPU = False

# ------------------------
# Load dataset
# ------------------------
print(f"[INFO] Dataset loaded. Shape: {df.shape}")

# ------------------------
# Prepare features and target
# ------------------------
X = df.drop("tfopwg_disp", axis=1)
y = df["tfopwg_disp"]

# Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
print(f"[INFO] Target classes: {list(le.classes_)}")

# ------------------------
# Train-test split (stratified)
# ------------------------
X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)
print(f"[INFO] Train shape: {X_train.shape}, Validation shape: {X_val.shape}")

# ------------------------
# Scale features
# ------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
print("[INFO] Features standardized")

# ------------------------
# Train Logistic Regression
# ------------------------
if USE_GPU:
    model = cuLogisticRegression(max_iter=1000, verbose=0)
else:
    model = LogisticRegression(max_iter=1000, verbose=0, n_jobs=-1)

print("[INFO] Training Logistic Regression...")
model.fit(X_train_scaled, y_train)
print("[INFO] Training completed!")

# ------------------------
# Predictions
# ------------------------
y_pred = model.predict(X_val_scaled)

# ------------------------
# Confusion Matrix & Metrics
# ------------------------
cm = confusion_matrix(y_val, y_pred)
print("\n===== CONFUSION MATRIX =====")
print(cm)

# Calculate TP, TN, FP, FN for multi-class
# Here, we'll calculate **per-class** metrics
classes = le.classes_
for idx, cls in enumerate(classes):
    TP = cm[idx, idx]
    FP = cm[:, idx].sum() - TP
    FN = cm[idx, :].sum() - TP
    TN = cm.sum() - (TP + FP + FN)
    
    print(f"\nClass: {cls}")
    print(f"TP: {TP}, TN: {TN}, FP: {FP}, FN: {FN}")
    
    Accuracy = (TP + TN) / (TP + TN + FP + FN)
    Error_Rate = 1 - Accuracy
    Precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    Recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    F1 = 2 * Precision * Recall / (Precision + Recall) if (Precision + Recall) != 0 else 0
    Specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
    FPR = FP / (FP + TN) if (FP + TN) != 0 else 0
    FNR = FN / (FN + TP) if (FN + TP) != 0 else 0
    
    print(f"Accuracy: {Accuracy:.4f}")
    print(f"Error Rate: {Error_Rate:.4f}")
    print(f"Precision: {Precision:.4f}")
    print(f"Recall / TPR: {Recall:.4f}")
    print(f"F1 Score: {F1:.4f}")
    print(f"Specificity / TNR: {Specificity:.4f}")
    print(f"False Positive Rate (FPR): {FPR:.4f}")
    print(f"False Negative Rate (FNR): {FNR:.4f}")

print("\n[INFO] Logistic Regression evaluation completed!")


[INFO] cuML not found - Falling back to CPU Logistic Regression
[INFO] Dataset loaded. Shape: (7703, 32)
[INFO] Target classes: [0, 1, 2, 3, 4, 5]
[INFO] Train shape: (6162, 31), Validation shape: (1541, 31)
[INFO] Features standardized
[INFO] Training Logistic Regression...
[INFO] Training completed!

===== CONFUSION MATRIX =====
[[  6   3   0  10   5  68]
 [  4  50   1   1   1  80]
 [  0   0   0   6   0  14]
 [  5   3   0  64   3 164]
 [  1   4   0   3  19  90]
 [  6  27   1  27   6 869]]

Class: 0
TP: 6, TN: 1433, FP: 16, FN: 86
Accuracy: 0.9338
Error Rate: 0.0662
Precision: 0.2727
Recall / TPR: 0.0652
F1 Score: 0.1053
Specificity / TNR: 0.9890
False Positive Rate (FPR): 0.0110
False Negative Rate (FNR): 0.9348

Class: 1
TP: 50, TN: 1367, FP: 37, FN: 87
Accuracy: 0.9195
Error Rate: 0.0805
Precision: 0.5747
Recall / TPR: 0.3650
F1 Score: 0.4464
Specificity / TNR: 0.9736
False Positive Rate (FPR): 0.0264
False Negative Rate (FNR): 0.6350

Class: 2
TP: 0, TN: 1519, FP: 2, FN: 20
Accura

In [3]:
# ========================
# Random Forest GPU/CPU Training Pipeline
# ========================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix
from tqdm import tqdm

# Try GPU (cuML), fallback to CPU (scikit-learn)
try:
    from cuml.ensemble import RandomForestClassifier as cuRF
    print("[INFO] cuML found - Using GPU Random Forest")
    USE_GPU = True
except ImportError:
    from sklearn.ensemble import RandomForestClassifier
    print("[INFO] cuML not found - Falling back to CPU Random Forest")
    USE_GPU = False

# ------------------------
# Load dataset
# ------------------------
print(f"[INFO] Dataset loaded. Shape: {df.shape}")

# ------------------------
# Prepare features and target
# ------------------------
X = df.drop("tfopwg_disp", axis=1)
y = df["tfopwg_disp"]

# Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
print(f"[INFO] Target classes: {list(le.classes_)}")

# ------------------------
# Train-test split (stratified)
# ------------------------
X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)
print(f"[INFO] Train shape: {X_train.shape}, Validation shape: {X_val.shape}")

# ------------------------
# Scaling optional for RF, but keep for consistency
# ------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
print("[INFO] Features standardized")

# ------------------------
# Train Random Forest
# ------------------------
if USE_GPU:
    model = cuRF(n_estimators=100, max_depth=12, verbose=0)
else:
    model = RandomForestClassifier(n_estimators=100, max_depth=12, n_jobs=-1, verbose=0)

print("[INFO] Training Random Forest...")
model.fit(X_train_scaled, y_train)
print("[INFO] Training completed!")

# ------------------------
# Predictions
# ------------------------
y_pred = model.predict(X_val_scaled)

# ------------------------
# Confusion Matrix & Metrics
# ------------------------
cm = confusion_matrix(y_val, y_pred)
print("\n===== CONFUSION MATRIX =====")
print(cm)

# Multi-class metrics per class
classes = le.classes_
for idx, cls in enumerate(classes):
    TP = cm[idx, idx]
    FP = cm[:, idx].sum() - TP
    FN = cm[idx, :].sum() - TP
    TN = cm.sum() - (TP + FP + FN)
    
    print(f"\nClass: {cls}")
    print(f"TP: {TP}, TN: {TN}, FP: {FP}, FN: {FN}")
    
    Accuracy = (TP + TN) / (TP + TN + FP + FN)
    Error_Rate = 1 - Accuracy
    Precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    Recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    F1 = 2 * Precision * Recall / (Precision + Recall) if (Precision + Recall) != 0 else 0
    Specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
    FPR = FP / (FP + TN) if (FP + TN) != 0 else 0
    FNR = FN / (FN + TP) if (FN + TP) != 0 else 0
    
    print(f"Accuracy: {Accuracy:.4f}")
    print(f"Error Rate: {Error_Rate:.4f}")
    print(f"Precision: {Precision:.4f}")
    print(f"Recall / TPR: {Recall:.4f}")
    print(f"F1 Score: {F1:.4f}")
    print(f"Specificity / TNR: {Specificity:.4f}")
    print(f"False Positive Rate (FPR): {FPR:.4f}")
    print(f"False Negative Rate (FNR): {FNR:.4f}")

print("\n[INFO] Random Forest evaluation completed!")


[INFO] cuML not found - Falling back to CPU Random Forest
[INFO] Dataset loaded. Shape: (7703, 32)
[INFO] Target classes: [0, 1, 2, 3, 4, 5]
[INFO] Train shape: (6162, 31), Validation shape: (1541, 31)
[INFO] Features standardized
[INFO] Training Random Forest...
[INFO] Training completed!

===== CONFUSION MATRIX =====
[[  4   1   0  17   1  69]
 [  0  51   0   3   7  76]
 [  0   0   0   5   0  15]
 [  0   1   0  88   2 148]
 [  0   3   0   2  38  74]
 [  2  14   1  26  10 883]]

Class: 0
TP: 4, TN: 1447, FP: 2, FN: 88
Accuracy: 0.9416
Error Rate: 0.0584
Precision: 0.6667
Recall / TPR: 0.0435
F1 Score: 0.0816
Specificity / TNR: 0.9986
False Positive Rate (FPR): 0.0014
False Negative Rate (FNR): 0.9565

Class: 1
TP: 51, TN: 1385, FP: 19, FN: 86
Accuracy: 0.9319
Error Rate: 0.0681
Precision: 0.7286
Recall / TPR: 0.3723
F1 Score: 0.4928
Specificity / TNR: 0.9865
False Positive Rate (FPR): 0.0135
False Negative Rate (FNR): 0.6277

Class: 2
TP: 0, TN: 1520, FP: 1, FN: 20
Accuracy: 0.9864
Er

In [4]:
# ========================
# XGBoost GPU/CPU Training Pipeline
# ========================

import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm

# Try GPU-enabled XGBoost
try:
    import xgboost as xgb
    print("[INFO] XGBoost imported. Will attempt GPU usage if available.")
    USE_GPU = True
except ImportError:
    raise ImportError("XGBoost is not installed. Install it via pip install xgboost.")

# ------------------------
# Load dataset
# ------------------------

print(f"[INFO] Dataset loaded. Shape: {df.shape}")

# ------------------------
# Prepare features and target
# ------------------------
X = df.drop("tfopwg_disp", axis=1)
y = df["tfopwg_disp"]

# Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
print(f"[INFO] Target classes: {list(le.classes_)}")

# ------------------------
# Train-test split (stratified)
# ------------------------
X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)
print(f"[INFO] Train shape: {X_train.shape}, Validation shape: {X_val.shape}")

# ------------------------
# Scaling optional for XGBoost (not required)
# ------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
print("[INFO] Features standardized")

# ------------------------
# Initialize XGBoost Classifier
# ------------------------
params = {
    "objective": "multi:softmax",
    "num_class": len(le.classes_),
    "max_depth": 6,
    "learning_rate": 0.1,
    "n_estimators": 200,
    "verbosity": 1,
    "tree_method": "gpu_hist" if USE_GPU else "hist"
}

print(f"[INFO] Training XGBoost with {'GPU' if USE_GPU else 'CPU'}...")
model = xgb.XGBClassifier(**params)
model.fit(X_train_scaled, y_train, eval_set=[(X_val_scaled, y_val)], verbose=True)
print("[INFO] Training completed!")

# ------------------------
# Predictions
# ------------------------
y_pred = model.predict(X_val_scaled)

# ------------------------
# Confusion Matrix & Metrics
# ------------------------
cm = confusion_matrix(y_val, y_pred)
print("\n===== CONFUSION MATRIX =====")
print(cm)

# Multi-class metrics per class
classes = le.classes_
for idx, cls in enumerate(classes):
    TP = cm[idx, idx]
    FP = cm[:, idx].sum() - TP
    FN = cm[idx, :].sum() - TP
    TN = cm.sum() - (TP + FP + FN)
    
    print(f"\nClass: {cls}")
    print(f"TP: {TP}, TN: {TN}, FP: {FP}, FN: {FN}")
    
    Accuracy = (TP + TN) / (TP + TN + FP + FN)
    Error_Rate = 1 - Accuracy
    Precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    Recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    F1 = 2 * Precision * Recall / (Precision + Recall) if (Precision + Recall) != 0 else 0
    Specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
    FPR = FP / (FP + TN) if (FP + TN) != 0 else 0
    FNR = FN / (FN + TP) if (FN + TP) != 0 else 0
    
    print(f"Accuracy: {Accuracy:.4f}")
    print(f"Error Rate: {Error_Rate:.4f}")
    print(f"Precision: {Precision:.4f}")
    print(f"Recall / TPR: {Recall:.4f}")
    print(f"F1 Score: {F1:.4f}")
    print(f"Specificity / TNR: {Specificity:.4f}")
    print(f"False Positive Rate (FPR): {FPR:.4f}")
    print(f"False Negative Rate (FNR): {FNR:.4f}")

print("\n[INFO] XGBoost evaluation completed!")


[INFO] XGBoost imported. Will attempt GPU usage if available.
[INFO] Dataset loaded. Shape: (7703, 32)
[INFO] Target classes: [0, 1, 2, 3, 4, 5]
[INFO] Train shape: (6162, 31), Validation shape: (1541, 31)
[INFO] Features standardized
[INFO] Training XGBoost with GPU...
[0]	validation_0-mlogloss:1.66368
[1]	validation_0-mlogloss:1.56392
[2]	validation_0-mlogloss:1.48213
[3]	validation_0-mlogloss:1.41318
[4]	validation_0-mlogloss:1.35468
[5]	validation_0-mlogloss:1.30403



    E.g. tree_method = "hist", device = "cuda"



[6]	validation_0-mlogloss:1.25963
[7]	validation_0-mlogloss:1.22068
[8]	validation_0-mlogloss:1.18652
[9]	validation_0-mlogloss:1.15599
[10]	validation_0-mlogloss:1.12854
[11]	validation_0-mlogloss:1.10335
[12]	validation_0-mlogloss:1.08064
[13]	validation_0-mlogloss:1.06160
[14]	validation_0-mlogloss:1.04422
[15]	validation_0-mlogloss:1.02825
[16]	validation_0-mlogloss:1.01376
[17]	validation_0-mlogloss:0.99975
[18]	validation_0-mlogloss:0.98689
[19]	validation_0-mlogloss:0.97623
[20]	validation_0-mlogloss:0.96678
[21]	validation_0-mlogloss:0.95746
[22]	validation_0-mlogloss:0.94934
[23]	validation_0-mlogloss:0.94197
[24]	validation_0-mlogloss:0.93521
[25]	validation_0-mlogloss:0.92836
[26]	validation_0-mlogloss:0.92338
[27]	validation_0-mlogloss:0.91832
[28]	validation_0-mlogloss:0.91266
[29]	validation_0-mlogloss:0.90839
[30]	validation_0-mlogloss:0.90428
[31]	validation_0-mlogloss:0.90047
[32]	validation_0-mlogloss:0.89752
[33]	validation_0-mlogloss:0.89470
[34]	validation_0-mloglo


    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [6]:
# ========================
# CatBoost GPU/CPU Training Pipeline
# ========================

import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm

# Try GPU-enabled CatBoost
try:
    from catboost import CatBoostClassifier, Pool
    print("[INFO] CatBoost imported. Will attempt GPU usage if available.")
    USE_GPU = True
except ImportError:
    raise ImportError("CatBoost is not installed. Install it via pip install catboost.")

# ------------------------
# Load dataset
# ------------------------
df = pd.read_csv("tess_toi_ml_ready.csv")
print(f"[INFO] Dataset loaded. Shape: {df.shape}")

# ------------------------
# Prepare features and target
# ------------------------
X = df.drop("tfopwg_disp", axis=1)
y = df["tfopwg_disp"]

# Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
print(f"[INFO] Target classes: {list(le.classes_)}")

# ------------------------
# Train-test split (stratified)
# ------------------------
X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)
print(f"[INFO] Train shape: {X_train.shape}, Validation shape: {X_val.shape}")

# ------------------------
# Scaling optional for CatBoost
# ------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
print("[INFO] Features standardized")

# ------------------------
# Initialize CatBoost Classifier
# ------------------------
catboost_params = {
    "iterations": 500,
    "learning_rate": 0.1,
    "depth": 6,
    "loss_function": "MultiClass",
    "verbose": 50,
    "task_type": "GPU" if USE_GPU else "CPU"
}

print(f"[INFO] Training CatBoost with {'GPU' if USE_GPU else 'CPU'}...")
model = CatBoostClassifier(**catboost_params)
model.fit(X_train_scaled, y_train, eval_set=(X_val_scaled, y_val))
print("[INFO] Training completed!")

# ------------------------
# Predictions
# ------------------------
y_pred = model.predict(X_val_scaled)
y_pred = y_pred.flatten().astype(int)

# ------------------------
# Confusion Matrix & Metrics
# ------------------------
cm = confusion_matrix(y_val, y_pred)
print("\n===== CONFUSION MATRIX =====")
print(cm)

# Multi-class metrics per class
classes = le.classes_
for idx, cls in enumerate(classes):
    TP = cm[idx, idx]
    FP = cm[:, idx].sum() - TP
    FN = cm[idx, :].sum() - TP
    TN = cm.sum() - (TP + FP + FN)
    
    print(f"\nClass: {cls}")
    print(f"TP: {TP}, TN: {TN}, FP: {FP}, FN: {FN}")
    
    Accuracy = (TP + TN) / (TP + TN + FP + FN)
    Error_Rate = 1 - Accuracy
    Precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    Recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    F1 = 2 * Precision * Recall / (Precision + Recall) if (Precision + Recall) != 0 else 0
    Specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
    FPR = FP / (FP + TN) if (FP + TN) != 0 else 0
    FNR = FN / (FN + TP) if (FN + TP) != 0 else 0
    
    print(f"Accuracy: {Accuracy:.4f}")
    print(f"Error Rate: {Error_Rate:.4f}")
    print(f"Precision: {Precision:.4f}")
    print(f"Recall / TPR: {Recall:.4f}")
    print(f"F1 Score: {F1:.4f}")
    print(f"Specificity / TNR: {Specificity:.4f}")
    print(f"False Positive Rate (FPR): {FPR:.4f}")
    print(f"False Negative Rate (FNR): {FNR:.4f}")

print("\n[INFO] CatBoost evaluation completed!")


[INFO] CatBoost imported. Will attempt GPU usage if available.
[INFO] Dataset loaded. Shape: (7703, 32)
[INFO] Target classes: [0, 1, 2, 3, 4, 5]
[INFO] Train shape: (6162, 31), Validation shape: (1541, 31)
[INFO] Features standardized
[INFO] Training CatBoost with GPU...
0:	learn: 1.6152467	test: 1.6184722	best: 1.6184722 (0)	total: 20.2ms	remaining: 10.1s
50:	learn: 0.8175552	test: 0.8975059	best: 0.8975059 (50)	total: 321ms	remaining: 2.82s
100:	learn: 0.7193936	test: 0.8587612	best: 0.8587612 (100)	total: 783ms	remaining: 3.09s
150:	learn: 0.6517895	test: 0.8454848	best: 0.8454223 (148)	total: 1.27s	remaining: 2.93s
200:	learn: 0.5982811	test: 0.8391417	best: 0.8385934 (198)	total: 1.71s	remaining: 2.54s
250:	learn: 0.5513472	test: 0.8343559	best: 0.8339948 (235)	total: 2.17s	remaining: 2.15s
300:	learn: 0.5117353	test: 0.8292642	best: 0.8291505 (290)	total: 2.63s	remaining: 1.74s
350:	learn: 0.4754314	test: 0.8307571	best: 0.8289083 (310)	total: 3.09s	remaining: 1.31s
400:	learn: 

In [3]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
import cupy as cp  # We'll use cp arrays if needed for compatibility

# Assuming X_train, X_val, y_train, y_val are ready (CPU)
print("Starting SVM training...")

# Feature Scaling is important for SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

print("Training SVM classifier...")
svm_model = SVC(kernel='rbf', probability=True, random_state=42)
svm_model.fit(X_train_scaled, y_train)
print("Training completed.")

print("Making predictions...")
y_pred = svm_model.predict(X_val_scaled)
print("Predictions done.")

# Metrics calculation
cm = confusion_matrix(y_val, y_pred)
TP = sum([cm[i,i] for i in range(cm.shape[0])])
FP = sum([cm[:,i].sum() - cm[i,i] for i in range(cm.shape[0])])
FN = sum([cm[i,:].sum() - cm[i,i] for i in range(cm.shape[0])])
TN = cm.sum() - (TP + FP + FN)

Accuracy = (TP + TN) / (TP + TN + FP + FN)
Error_Rate = 1 - Accuracy
Precision = TP / (TP + FP) if (TP + FP) != 0 else 0
Recall = TP / (TP + FN) if (TP + FN) != 0 else 0
F1 = 2 * Precision * Recall / (Precision + Recall) if (Precision + Recall) != 0 else 0
Specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
FPR = FP / (FP + TN) if (FP + TN) != 0 else 0
FNR = FN / (FN + TP) if (FN + TP) != 0 else 0

# Print all metrics
print("\n===== SVM CLASSIFICATION METRICS =====")
print(f"Confusion Matrix:\n{cm}")
print(f"TP: {TP}, TN: {TN}, FP: {FP}, FN: {FN}")
print(f"Accuracy: {Accuracy:.4f}")
print(f"Error Rate: {Error_Rate:.4f}")
print(f"Precision: {Precision:.4f}")
print(f"Recall / TPR: {Recall:.4f}")
print(f"F1 Score: {F1:.4f}")
print(f"Specificity / TNR: {Specificity:.4f}")
print(f"FPR: {FPR:.4f}")
print(f"FNR: {FNR:.4f}")


Starting SVM training...
Training SVM classifier...
Training completed.
Making predictions...
Predictions done.

===== SVM CLASSIFICATION METRICS =====
Confusion Matrix:
[[  0   1   0   9   1  35]
 [  1  15   0   0   0  53]
 [  0   0   0   1   0   9]
 [  0   2   0  39   0  79]
 [  0   1   0   0   5  52]
 [  0   9   0  15   3 441]]
TP: 500, TN: -271, FP: 271, FN: 271
Accuracy: 0.2970
Error Rate: 0.7030
Precision: 0.6485
Recall / TPR: 0.6485
F1 Score: 0.6485
Specificity / TNR: 0.0000
FPR: 0.0000
FNR: 0.3515
