In [1]:
from sklearn.datasets import load_breast_cancer
breast_cancer_data = load_breast_cancer()


In [2]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# Load the Breast Cancer dataset
dataset = load_breast_cancer()
features = pd.DataFrame(dataset.data, columns=dataset.feature_names)
labels = pd.Series(dataset.target)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Save the dataframes to CSV files
X_train.to_csv('train_features.csv', index=False)
X_test.to_csv('test_features.csv', index=False)
y_train.to_csv('train_labels.csv', index=False)
y_test.to_csv('test_labels.csv', index=False)


In [3]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Define the feature selection
feature_selector = SelectKBest(score_func=f_classif, k=10)  # Select top 10 features
selected_features = feature_selector.fit_transform(X_train, y_train)

# Print the selected features
selected_columns = X_train.columns[feature_selector.get_support()]
print("Selected Features:", selected_columns)


Selected Features: Index(['mean radius', 'mean perimeter', 'mean area', 'mean concavity',
       'mean concave points', 'worst radius', 'worst perimeter', 'worst area',
       'worst concavity', 'worst concave points'],
      dtype='object')


In [4]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

# Define the ANN model
ann_model = MLPClassifier(max_iter=1000)

# Define the parameter grid for Grid Search
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001]
}

# Set up Grid Search with Cross-Validation
grid_search_cv = GridSearchCV(estimator=ann_model, param_grid=param_grid, cv=3, n_jobs=-1)
grid_search_results = grid_search_cv.fit(X_train, y_train)

# Print the best parameters found by Grid Search
print(f"Optimal Parameters: {grid_search_results.best_params_}")


Optimal Parameters: {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (50, 50), 'solver': 'adam'}


In [5]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Retrieve the best parameters from Grid Search
best_params = grid_search_results.best_params_
best_ann_model = MLPClassifier(**best_params, max_iter=1000)
best_ann_model.fit(X_train, y_train)

# Predict and evaluate the model
predictions = best_ann_model.predict(X_test)
print(f"Accuracy on Test Data: {accuracy_score(y_test, predictions):.4f}")
print("Classification Report:\n", classification_report(y_test, predictions))


Accuracy on Test Data: 0.9708
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.92      0.96        63
           1       0.96      1.00      0.98       108

    accuracy                           0.97       171
   macro avg       0.98      0.96      0.97       171
weighted avg       0.97      0.97      0.97       171



In [7]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.37.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Using cached altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Using cached blinker-1.8.2-py3-none-any.whl.metadata (1.6 kB)
Collecting click<9,>=7.0 (from streamlit)
  Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting pyarrow>=7.0 (from streamlit)
  Downloading pyarrow-17.0.0-cp311-cp311-win_amd64.whl.metadata (3.4 kB)
Collecting rich<14,>=10.14.0 (from streamlit)
  Using cached rich-13.7.1-py3-none-any.whl.metadata (18 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Using cached toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Using cached GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Using cached pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<5


[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import streamlit as st
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

st.title("Breast Cancer Prediction App")

# Load preprocessed data
X_train = pd.read_csv('train_features.csv')
X_test = pd.read_csv('test_features.csv')
y_train = pd.read_csv('train_labels.csv')
y_test = pd.read_csv('test_labels.csv')

# Initialize and train the ANN model
trained_model = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.0001, max_iter=1000)
trained_model.fit(X_train, y_train)

# User input
st.sidebar.header("Enter Features")
user_input = st.sidebar.text_input("Enter feature values separated by commas (e.g., 0.1,0.2,0.3,...):")

if user_input:
    user_data = pd.DataFrame([user_input.split(",")], columns=X_train.columns)
    prediction = trained_model.predict(user_data)
    st.write(f"Prediction: {'Malignant' if prediction[0] == 1 else 'Benign'}")

# Model evaluation
y_test_pred = trained_model.predict(X_test)
st.write(f"Accuracy on Test Data: {accuracy_score(y_test, y_test_pred):.4f}")


  y = column_or_1d(y, warn=True)
