In [None]:
import pandas as pd
import numpy as np
import pickle

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [None]:
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['diagnosis'] = data.target

df.head()


In [None]:
df = df[['mean radius', 'mean texture', 'mean perimeter',
         'mean area', 'mean smoothness', 'diagnosis']]

In [None]:
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

with open("breast_cancer_model.pkl", "wb") as file:
    pickle.dump((model, scaler), file)

print("breast_cancer_model.pkl saved successfully")

In [None]:
df.isnull().sum()


In [None]:
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
with open("breast_cancer_model.pkl", "wb") as file:
    pickle.dump((model, scaler), file)

print("breast_cancer_model.pkl saved successfully")


In [None]:
with open("breast_cancer_model.pkl", "rb") as file:
    loaded_model, loaded_scaler = pickle.load(file)

sample = loaded_scaler.transform([X.iloc[0]])
prediction = loaded_model.predict(sample)
print(f"Prediction for the first sample: {prediction[0]}")