In [None]:
#IMPORTING LIBRARIES
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score


In [None]:
#LOADING DATA
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target


In [None]:
#TRAINING TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
#BASELINE MODEL (NO FEATURE ENGINEERING)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

baseline_model = LogisticRegression(max_iter=500)
baseline_model.fit(X_train_scaled, y_train)

y_pred_base = baseline_model.predict(X_test_scaled)
y_prob_base = baseline_model.predict_proba(X_test_scaled)[:, 1]

print("BASELINE PERFORMANCE")
print("Accuracy:", accuracy_score(y_test, y_pred_base))
print("Precision:", precision_score(y_test, y_pred_base))
print("Recall:", recall_score(y_test, y_pred_base))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_base))


BASELINE PERFORMANCE
Accuracy: 0.9824561403508771
Precision: 0.9861111111111112
Recall: 0.9861111111111112
ROC-AUC: 0.9953703703703703


In [None]:
#FEATURE ENGINNERING
def engineer_features(df):
    df = df.copy()
    
    df["radius_perimeter_ratio"] = df["mean radius"] / df["mean perimeter"]
    df["area_radius_ratio"] = df["mean area"] / (df["mean radius"] ** 2)
    df["texture_smoothness_interaction"] = df["mean texture"] * df["mean smoothness"]
    
    return df

X_train_fe = engineer_features(X_train)
X_test_fe = engineer_features(X_test)


In [None]:
#REATRAING MODEL WITH NEW FEATURES
scaler_fe = StandardScaler()
X_train_fe_scaled = scaler_fe.fit_transform(X_train_fe)
X_test_fe_scaled = scaler_fe.transform(X_test_fe)

fe_model = LogisticRegression(max_iter=500)
fe_model.fit(X_train_fe_scaled, y_train)

y_pred_fe = fe_model.predict(X_test_fe_scaled)
y_prob_fe = fe_model.predict_proba(X_test_fe_scaled)[:, 1]

print("\nAFTER FEATURE ENGINEERING")
print("Accuracy:", accuracy_score(y_test, y_pred_fe))
print("Precision:", precision_score(y_test, y_pred_fe))
print("Recall:", recall_score(y_test, y_prob_fe > 0.5))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_fe))



AFTER FEATURE ENGINEERING
Accuracy: 0.9824561403508771
Precision: 0.9861111111111112
Recall: 0.9861111111111112
ROC-AUC: 0.996031746031746


In [None]:
#COMPARING RESULTS
results = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "ROC-AUC"],
    "Baseline": [
        accuracy_score(y_test, y_pred_base),
        precision_score(y_test, y_pred_base),
        recall_score(y_test, y_pred_base),
        roc_auc_score(y_test, y_prob_base)
    ],
    "After Feature Eng": [
        accuracy_score(y_test, y_pred_fe),
        precision_score(y_test, y_pred_fe),
        recall_score(y_test, y_pred_fe),
        roc_auc_score(y_test, y_prob_fe)
    ]
})

print(results)


      Metric  Baseline  After Feature Eng
0   Accuracy  0.982456           0.982456
1  Precision  0.986111           0.986111
2     Recall  0.986111           0.986111
3    ROC-AUC  0.995370           0.996032
