In [3]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# Load and preprocess data
breast_cancer = fetch_ucirepo(id=15)
X = breast_cancer.data.features.replace('?', np.nan).apply(pd.to_numeric).dropna()
y = breast_cancer.data.targets.loc[X.index].replace({2:0, 4:1})

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Enhanced SVM implementation
svm_linear = SVC(kernel='linear', C=1.0, random_state=42)
svm_linear.fit(X_train_scaled, y_train.values.ravel())

# Cross-validation
cv_scores = cross_val_score(svm_linear, X_train_scaled, y_train.values.ravel(), cv=5)

# Predictions
y_pred = svm_linear.predict(X_test_scaled)

# Comprehensive evaluation
conf_matrix = confusion_matrix(y_test, y_pred)
metrics = {
    'Accuracy': accuracy_score(y_test, y_pred),
    'Precision': precision_score(y_test, y_pred),
    'Recall': recall_score(y_test, y_pred),
    'F1-Score': f1_score(y_test, y_pred),
    'CV Mean Score': cv_scores.mean()
}

# Display results
print("Linear SVM Performance:")
print(f"- Cross-validation Accuracy (5-fold): {metrics['CV Mean Score']:.4f}")
print(f"- Test Set Accuracy: {metrics['Accuracy']:.4f}")
print(f"- Precision: {metrics['Precision']:.3f} | Recall: {metrics['Recall']:.3f} | F1: {metrics['F1-Score']:.3f}")

pd.DataFrame(conf_matrix,
             index=['Actual Benign (TN/FP)', 'Actual Malignant (FN/TP)'],
             columns=['Predicted Benign', 'Predicted Malignant'])

Linear SVM Performance:
- Cross-validation Accuracy (5-fold): 0.9726
- Test Set Accuracy: 0.9591
- Precision: 0.921 | Recall: 0.967 | F1: 0.943


Unnamed: 0,Predicted Benign,Predicted Malignant
Actual Benign (TN/FP),106,5
Actual Malignant (FN/TP),2,58
