# Breast Cancer Classification (Logistic Regression as Final Model)


In [ ]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

## Load Dataset

In [ ]:
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
df = X.copy()
df['target'] = y
df.head()

## Train/Test Split

In [ ]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Baseline Model Comparison

In [ ]:
models = {
    'LogisticRegression': LogisticRegression(max_iter=500, solver='liblinear'),
    'RandomForest': RandomForestClassifier(random_state=42),
    'GradientBoosting': GradientBoostingClassifier(random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    results[name] = accuracy_score(y_test, pred)

pd.DataFrame.from_dict(results, orient='index', columns=['Accuracy'])

## Final Model (Logistic Regression Selected)

In [ ]:
final_model = LogisticRegression(max_iter=500, solver='liblinear')
final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)

print('Final Model: Logistic Regression')
print('Accuracy:', round(accuracy_score(y_test, y_pred), 4))
print(classification_report(y_test, y_pred))

sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.show()

## Save Model

In [ ]:
joblib.dump(final_model, 'best_model_logistic_regression.pkl')
print('✅ Model saved as best_model_logistic_regression.pkl')