In [None]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

## Load datasets

In [None]:
# Load the iris dataset
data = load_breast_cancer()

# Form the dataframe
df = pd.DataFrame(
    data=data.data,
    columns=data.feature_names
)

df['target'] = data.target
df['target'] = df['target'].map({
    0: 'malignant',
    1: 'benign'
})

In [None]:
print(f'Dataset size: {df.shape}')
print(f'Dataset features: {df.columns.tolist()}')
print(f'Dataset info:')
df.info()
print(f'Dataset description:')
df.describe()

# Check for missing values
df.isnull().sum()

In [None]:
print('Class distribution:')
print(df['target'].value_counts())
print(df['target'].value_counts())

In [None]:
# Save the dataframe to a CSV file
df.to_csv('breast_cancer_dataset.csv', index=False)

## Data Preparation

In [None]:
# Prepare the data
X = df.drop('target', axis=1)
y = df['target']
feature_names = X.columns.tolist()
target_names = ['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
lr = LogisticRegression(max_iter=200, random_state=42)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

# Evaluate the model
print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')
print(f'Classification Report:')
print(classification_report(y_test, y_pred))

In [None]:
svc = SVC(kernel='rbf', random_state=42)
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)

# Evaluate the model
print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')
print(f'Classification Report:')
print(classification_report(y_test, y_pred))

In [None]:
# Save the scaler
scaler_filename = 'models/scaler.pkl'
joblib.dump(scaler, scaler_filename)

In [None]:
# Save the model
lr_model_filename = 'models/logistic_regression.pkl'
joblib.dump(lr, lr_model_filename)

svc_model_filename = 'models/svm.pkl'
joblib.dump(svc, svc_model_filename)