In [14]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib  # ✅ Use joblib instead of pickle

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target
features = data.feature_names

# Define top 15 features
top15 = [
    'worst area', 'worst concave points', 'mean concave points', 'worst radius',
    'worst perimeter', 'mean perimeter', 'mean concavity', 'mean area',
    'worst concavity', 'mean radius', 'mean texture', 'worst texture',
    'worst smoothness', 'mean smoothness', 'worst compactness'
]
top15_indices = [list(features).index(f) for f in top15]
X_top15 = X[:, top15_indices]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_top15)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save using joblib
joblib.dump(model, 'model.joblib')
joblib.dump(scaler, 'scaler.joblib')
joblib.dump(top15, 'features.joblib')

print("✅ Model, scaler, and features saved with joblib.")


✅ Model, scaler, and features saved with joblib.
