# Breast Cancer Prediction Model Development

## 1. Load Dataset

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib

# Load the dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['diagnosis'] = data.target

# Selected features
selected_features = ['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness']
target = 'diagnosis'

print(df[selected_features].head())

## 2. Preprocessing & Splitting

In [None]:
X = df[selected_features]
y = df[target]

# Pipeline: Scale then model
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## 3. Train Model (Logistic Regression)

In [None]:
pipeline.fit(X_train, y_train)
print("Model trained.")

## 4. Evaluate Model

In [None]:
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred, target_names=data.target_names))
print("Accuracy:", accuracy_score(y_test, y_pred))

## 5. Save Model

In [None]:
joblib.dump(pipeline, 'breast_cancer_model.pkl')
print("Model saved to breast_cancer_model.pkl")