In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.base import clone
import pickle

# Load cleaned data
df = pd.read_csv('breast-cancer-wisconsin_cleaned.csv')
features = ['1', '2', '3', '4', '5', '6', '7', '8', '9']
X = df[features]
y = df['10']  # Target already 0/1

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Train tuned LinearSVC
best_model = LinearSVC(C=0.5, max_iter=100, penalty='l2', dual=False, class_weight='balanced', random_state=42)
best_model.fit(X_train_scaled, y_train)  # Fit the LinearSVC model

# Calibrate for probabilities
calibrated_model = CalibratedClassifierCV(clone(best_model), method='sigmoid')
calibrated_model.fit(X_train_scaled, y_train)  # Fit the calibrated model

# Save model and scaler
with open('model.pkl', 'wb') as f:
    pickle.dump(calibrated_model, f)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

print("Model and scaler saved as pickle files.")

Model and scaler saved as pickle files.
