<a href="https://colab.research.google.com/github/Kishan-jobs/Calypso/blob/main/breast_cancer(ann98_).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, roc_curve, confusion_matrix
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping

# Hyperparameters
lambda_values = [0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
epochs = 150
initial_learning_rate = 0.01

# Load and cache data
@st.cache_data
def load_data():
    df = pd.read_csv('bcdataset.csv')
    df = df.drop(columns=['id', 'Unnamed: 32'], errors='ignore')
    df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})
    return df

# Preprocess: Train / CV / Test split
def preprocess_data(df):
    X = df.drop(columns='diagnosis')
    y = df['diagnosis']

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, stratify=y, random_state=42)
    X_cv, X_test, y_cv, y_test = train_test_split(X_temp, y_temp, test_size=0.50, stratify=y_temp, random_state=42)

    smote = SMOTE(random_state=42)
    X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_res)
    X_cv_scaled = scaler.transform(X_cv)
    X_test_scaled = scaler.transform(X_test)

    pca = PCA(n_components=0.95)
    X_train_pca = pca.fit_transform(X_train_scaled)
    X_cv_pca = pca.transform(X_cv_scaled)
    X_test_pca = pca.transform(X_test_scaled)

    return X_train_pca, y_train_res, X_cv_pca, y_cv, X_test_pca, y_test, scaler, pca, X.columns.tolist()

# Lambda tuning with early stopping
def tune_lambda(X_train, y_train, X_cv, y_cv):
    train_errors, cv_errors = [], []

    for lam in lambda_values:
        model = Sequential([
            Input(shape=(X_train.shape[1],)),
            Dense(100, activation='relu', kernel_regularizer=regularizers.l2(lam)),
            Dense(1, activation='linear')
        ])
        model.compile(
            optimizer=Adam(learning_rate=initial_learning_rate),
            loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
        )

        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=0)

        model.fit(X_train, y_train, validation_data=(X_cv, y_cv),
                  epochs=epochs, batch_size=16, verbose=0, callbacks=[early_stop])

        y_train_pred = tf.sigmoid(model.predict(X_train)).numpy().flatten()
        y_cv_pred = tf.sigmoid(model.predict(X_cv)).numpy().flatten()

        train_errors.append(1 - accuracy_score(y_train, y_train_pred > 0.5))
        cv_errors.append(1 - accuracy_score(y_cv, y_cv_pred > 0.5))

    return train_errors, cv_errors

def plot_lambda_errors(train_errors, cv_errors):
    log_lambdas = [np.log10(l) if l > 0 else -4 for l in lambda_values]
    plt.figure(figsize=(8, 5))
    plt.plot(log_lambdas, train_errors, marker='o', label='Train Error')
    plt.plot(log_lambdas, cv_errors, marker='o', label='CV Error')
    plt.xlabel('log₁₀(λ)')
    plt.ylabel('Fractional Error (1 - Accuracy)')
    plt.title('Train vs CV Error vs λ')
    plt.legend()
    plt.grid(True)
    st.pyplot(plt)

# Final training with best lambda
def train_final_model(X_train, y_train, X_cv, y_cv, X_test, y_test, cv_errors):
    best_idx = np.argmin(cv_errors)
    best_lambda = lambda_values[best_idx]
    st.markdown(f"**✅ Best λ = {best_lambda} (CV Error = {cv_errors[best_idx]:.4f})**")

    X_final = np.vstack([X_train, X_cv])
    y_final = np.concatenate([y_train, y_cv])

    model = Sequential([
        Input(shape=(X_final.shape[1],)),
        Dense(100, activation='relu', kernel_regularizer=regularizers.l2(best_lambda)),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=initial_learning_rate),
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    model.fit(X_final, y_final, epochs=epochs, batch_size=16, verbose=0)

    # Evaluation
    y_probs = tf.sigmoid(model.predict(X_test)).numpy().flatten()
    y_preds = (y_probs > 0.5).astype(int)
    auc = roc_auc_score(y_test, y_probs)
    report = classification_report(y_test, y_preds, output_dict=True)
    fpr, tpr, _ = roc_curve(y_test, y_probs)

    st.markdown(f"### 🎯 Final Test AUC: `{auc:.4f}`")
    st.subheader("📋 Classification Report")
    st.json({k: {m: round(v, 4) for m, v in v_dict.items()} if isinstance(v_dict, dict) else round(v_dict, 4)
             for k, v_dict in report.items()})

    st.subheader("📈 ROC Curve")
    plt.figure()
    plt.plot(fpr, tpr, label=f"AUC = {auc:.4f}")
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel("FPR")
    plt.ylabel("TPR")
    plt.title("ROC Curve - Final Model")
    plt.legend()
    st.pyplot(plt)

    return model

# Streamlit UI
st.title("🤖 Breast Cancer Detection with ANN and Regularization")

df = load_data()
X_train, y_train, X_cv, y_cv, X_test, y_test, scaler, pca, feature_names = preprocess_data(df)

if st.checkbox("🔍 Perform Lambda Tuning & Select Best Model"):
    train_errors, cv_errors = tune_lambda(X_train, y_train, X_cv, y_cv)
    plot_lambda_errors(train_errors, cv_errors)
    final_model = train_final_model(X_train, y_train, X_cv, y_cv, X_test, y_test, cv_errors)
else:
    # Simple fallback model if tuning not chosen
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(100, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=initial_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, batch_size=16, verbose=0)
    final_model = model

# Manual Prediction
st.subheader("🔍 Manual Input Prediction")
input_values = []
cols = st.columns(3)
for i, feature in enumerate(feature_names):
    with cols[i % 3]:
        val = st.number_input(feature, value=float(df[feature].mean()), format="%.4f")
        input_values.append(val)

if st.button("Predict Diagnosis"):
    df_input = pd.DataFrame([input_values], columns=feature_names)
    scaled = scaler.transform(df_input)
    pca_transformed = pca.transform(scaled)
    logit_pred = final_model.predict(pca_transformed)
    prob = tf.sigmoid(logit_pred).numpy().flatten()[0]
    label = "Malignant" if prob > 0.5 else "Benign"
    st.success(f"🎯 Prediction: {label} (Confidence: {prob:.4f})")

# Batch Prediction
st.subheader("📁 Predict from CSV File")
file = st.file_uploader("Upload CSV with features", type=["csv"])
if file:
    try:
        csv = pd.read_csv(file)
        scaled = scaler.transform(csv)
        pca_transformed = pca.transform(scaled)
        logits = final_model.predict(pca_transformed)
        probs = tf.sigmoid(logits).numpy().flatten()
        csv["Diagnosis"] = ["Malignant" if p > 0.5 else "Benign" for p in probs]
        csv["Confidence"] = [round(p, 4) for p in probs]
        st.write(csv)
    except Exception as e:
        st.error("Error processing file. Check formatting.")