In [3]:
import pandas as pd
import numpy as np
import joblib

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report
)


In [4]:
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df["diagnosis"] = data.target

In [5]:
df["diagnosis"] = df["diagnosis"].map({0: "Malignant", 1: "Benign"})

# Selected features
selected_features = [
    "mean radius",
    "mean texture",
    "mean perimeter",
    "mean area",
    "mean concavity"
]

X = df[selected_features]
y = df["diagnosis"]

In [6]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Pipeline (Scaling + Model)
pipeline = Pipeline(
    steps=[
        ("scaler", StandardScaler()),
        ("svm", SVC(kernel="rbf", probability=True, random_state=42))
    ]
)

In [7]:
pipeline.fit(X_train, y_train)

# Evaluate
y_pred = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [9]:

print("Model Performance:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1-score : {f1:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Save model & encoder
joblib.dump(pipeline, "breast_cancer_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
print("Model saved successfully.")

Model Performance:
Accuracy : 0.9474
Precision: 1.0000
Recall   : 0.8571
F1-score : 0.9231

Classification Report:
              precision    recall  f1-score   support

      Benign       0.92      1.00      0.96        72
   Malignant       1.00      0.86      0.92        42

    accuracy                           0.95       114
   macro avg       0.96      0.93      0.94       114
weighted avg       0.95      0.95      0.95       114

Model saved successfully.


In [1]:
df = pd.read_csv("titanic.csv")


NameError: name 'pd' is not defined