In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# 1. Load the Breast Cancer dataset
# This dataset contains features computed from a digitized image of a fine needle aspirate (FNA)
# of a breast mass.
data = load_breast_cancer()
X = data.data
y = data.target

# Target names: 'malignant' (0) and 'benign' (1)
target_names = data.target_names
print(f"Classes: {target_names}")

# 2. Split the data into training and testing sets
# We use 80% for training and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Feature Scaling
# SVMs are sensitive to the scale of the data. We use StandardScaler to normalize 
# features (mean=0, variance=1).
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Initialize and Train the Support Vector Machine (SVM)
# We use a linear kernel here as it is often very effective for high-dimensional 
# datasets like this one.
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_scaled, y_train)

# 5. Make Predictions
y_pred = svm_model.predict(X_test_scaled)

# 6. Evaluate the Model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, target_names=target_names)

# 7. Print Results
print(f"\n--- Model Accuracy: {accuracy:.4f} ---\n")

print("--- Confusion Matrix ---")
print(conf_matrix)
print("\n(Row=Actual, Col=Predicted)\n")

print("--- Classification Report ---")
print(class_report)

# Optional: Show a few example predictions
print("\n--- Example Predictions (First 5 Test Samples) ---")
df_results = pd.DataFrame({
    'Actual': [target_names[i] for i in y_test[:5]],
    'Predicted': [target_names[i] for i in y_pred[:5]]
})

print(df_results)












Classes: ['malignant' 'benign']

--- Model Accuracy: 0.9561 ---

--- Confusion Matrix ---
[[41  2]
 [ 3 68]]

(Row=Actual, Col=Predicted)

--- Classification Report ---
              precision    recall  f1-score   support

   malignant       0.93      0.95      0.94        43
      benign       0.97      0.96      0.96        71

    accuracy                           0.96       114
   macro avg       0.95      0.96      0.95       114
weighted avg       0.96      0.96      0.96       114


--- Example Predictions (First 5 Test Samples) ---
      Actual  Predicted
0     benign     benign
1  malignant  malignant
2  malignant  malignant
3     benign     benign
4     benign     benign
