In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# Load the breast cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target


In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Train a Support Vector Machine (SVM) classifier
svm_classifier = SVC(kernel='rbf', random_state=42)
svm_classifier.fit(X_train_scaled, y_train)

In [6]:
# Make predictions on the test set
y_pred = svm_classifier.predict(X_test_scaled)

In [7]:
# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['malignant', 'benign']))


Confusion Matrix:
[[41  2]
 [ 0 71]]

Classification Report:
              precision    recall  f1-score   support

   malignant       1.00      0.95      0.98        43
      benign       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [8]:
# Print feature names and their indices
print("\nFeature names and their indices:")
for i, feature_name in enumerate(data.feature_names):
    print(f"{i}: {feature_name}")


Feature names and their indices:
0: mean radius
1: mean texture
2: mean perimeter
3: mean area
4: mean smoothness
5: mean compactness
6: mean concavity
7: mean concave points
8: mean symmetry
9: mean fractal dimension
10: radius error
11: texture error
12: perimeter error
13: area error
14: smoothness error
15: compactness error
16: concavity error
17: concave points error
18: symmetry error
19: fractal dimension error
20: worst radius
21: worst texture
22: worst perimeter
23: worst area
24: worst smoothness
25: worst compactness
26: worst concavity
27: worst concave points
28: worst symmetry
29: worst fractal dimension


In [12]:
# Function to predict for new data
def predict_cancer(new_data):
    if len(new_data) != len(data.feature_names):
        raise ValueError(f"Input data must have {len(data.feature_names)} features")
    new_data_scaled = scaler.transform(new_data.reshape(1, -1))
    prediction = svm_classifier.predict(new_data_scaled)
    return "Malignant" if prediction[0] == 0 else "Benign"

In [16]:
# Example usage of the predict function
# This example uses the mean values of the features from the dataset
example_patient = X.mean(axis=0)
print("\nPrediction for example patient (using mean values):", predict_cancer(example_patient))


Prediction for example patient (using mean values): Benign
