In [1]:
# Importing necessary libraries
import pickle
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score

In [2]:
# Load the saved model, scaler, and selector
# This block is essential for deploying the trained model in a production environment or for further testing
with open('breast_cancer_model.pkl', 'rb') as model_file:
    mlp = pickle.load(model_file)  # Load the trained MLP model

with open('scaler.pkl', 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)  # Load the scaler for data normalization

with open('selector.pkl', 'rb') as selector_file:
    selector = pickle.load(selector_file)  # Load the selector for feature reduction

In [3]:
# Load and preprocess the breast cancer dataset
# This step simulates loading new data for prediction in a real-world scenario
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target  # Include target to verify our predictions if needed


In [4]:
# Define a new sample for prediction using a dictionary
# This represents how new data might come in, typically in a form that needs to be preprocessed like the training data
input_data = {
    'mean radius': [17.99],
    'mean texture': [10.38],
    'mean perimeter': [122.80],
    'mean area': [1001.0],
    'mean smoothness': [0.11840],
    'mean compactness': [0.27760],
    'mean concavity': [0.3001],
    'mean concave points': [0.14710],
    'mean symmetry': [0.2419],
    'mean fractal dimension': [0.07871],
    'radius error': [1.095],
    'texture error': [0.9053],
    'perimeter error': [8.589],
    'area error': [153.40],
    'smoothness error': [0.00663],
    'compactness error': [0.04954],
    'concavity error': [0.05373],
    'concave points error': [0.01587],
    'symmetry error': [0.03003],
    'fractal dimension error': [0.00268],
    'worst radius': [25.38],
    'worst texture': [17.33],
    'worst perimeter': [184.60],
    'worst area': [2019.0],
    'worst smoothness': [0.1622],
    'worst compactness': [0.6656],
    'worst concavity': [0.7119],
    'worst concave points': [0.2654],
    'worst symmetry': [0.4601],
    'worst fractal dimension': [0.11890]
}


In [5]:
# Convert dictionary to DataFrame to facilitate scaling and feature selection
input_df = pd.DataFrame(input_data)


In [6]:
# Transform the input data using the saved scaler and selector
# This step is crucial to ensure that the data fed into the model is in the same format as the data used for training
input_data_scaled = scaler.transform(input_df)
input_data_selected = selector.transform(input_data_scaled)

In [7]:
# Make predictions using the trained model
# Here we predict both the class and the probability of the class to provide more detailed output
prediction = mlp.predict(input_data_selected)
prediction_proba = mlp.predict_proba(input_data_selected)[0][1]  # Probability of being malignant


In [8]:
# Print the result, converting binary prediction to a human-readable format
result = "Malignant" if prediction[0] == 1 else "Benign"
print(f"Prediction for the new sample: {result}")
print(f"Probability of being malignant: {prediction_proba:.4f}")

Prediction for the new sample: Benign
Probability of being malignant: 0.0027
