In [2]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import pickle

# Load and preprocess data
breast_cancer = fetch_ucirepo(id=15)
X = breast_cancer.data.features.replace('?', np.nan).apply(pd.to_numeric).dropna()
y = breast_cancer.data.targets.loc[X.index].replace({2:0, 4:1})

# Select top 5 features
top_features = ['Bare_nuclei', 'Normal_nucleoli', 'Clump_thickness', 'Uniformity_of_cell_shape', 'Single_epithelial_cell_size']
X_top = X[top_features]

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X_top, y, test_size=0.25, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model
rbf_svm = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
rbf_svm.fit(X_train_scaled, y_train.values.ravel())

# Save the model and scaler as pickle files
with open('rbf_svm_model.pkl', 'wb') as file:
    pickle.dump(rbf_svm, file)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

print("Model and scaler saved as 'rbf_svm_model.pkl' and 'scaler.pkl'.")


Model and scaler saved as 'rbf_svm_model.pkl' and 'scaler.pkl'.
