<a href="https://colab.research.google.com/github/Kavya-sri-05/genai/blob/main/Logistic_Regression_with_Gradio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
# Logistic Regression with Gradio Interface - Improved with User Input and Graphs

!pip install gradio scikit-learn pandas numpy matplotlib seaborn

import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
from sklearn.datasets import load_breast_cancer
import io
from contextlib import redirect_stdout

def train_model_from_upload(file, feature_cols, target_col, test_size, random_state, C_value, max_iter):
    try:
        # Read the uploaded file
        if file.name.endswith('.csv'):
            df = pd.read_csv(file.name)
        elif file.name.endswith(('.xls', '.xlsx')):
            df = pd.read_excel(file.name)
        else:
            return None, None, "Unsupported file format. Please upload a CSV or Excel file."

        # Validate feature and target columns
        if target_col not in df.columns:
            return None, None, f"Target column '{target_col}' not found in dataset."

        feature_list = [f.strip() for f in feature_cols.split(',')]
        for feature in feature_list:
            if feature not in df.columns:
                return None, None, f"Feature column '{feature}' not found in dataset."

        # Prepare data
        X = df[feature_list]
        y = df[target_col]

        # Check if target is binary
        unique_values = y.unique()
        if len(unique_values) != 2:
            return None, None, f"Target column must have exactly 2 unique values for binary classification. Found {len(unique_values)} unique values."

        # Map target to 0/1 if needed
        if not all(val in [0, 1] for val in unique_values):
            value_map = {unique_values[0]: 0, unique_values[1]: 1}
            y = y.map(value_map)
            target_names = list(value_map.keys())
        else:
            target_names = ['Class 0', 'Class 1']

        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state
        )

        # Standardize features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train the model
        model = LogisticRegression(C=C_value, max_iter=max_iter, random_state=random_state)
        model.fit(X_train_scaled, y_train)

        # Make predictions
        y_pred = model.predict(X_test_scaled)
        y_pred_prob = model.predict_proba(X_test_scaled)[:, 1]

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)

        # Create confusion matrix plot
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=target_names,
                    yticklabels=target_names)
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title('Confusion Matrix')
        conf_matrix_plot = plt.gcf()

        # Create ROC curve
        fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
        roc_auc = auc(fpr, tpr)

        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(loc="lower right")
        roc_plot = plt.gcf()

        # Capture model information and performance
        feature_importance = pd.DataFrame({
            'Feature': X.columns,
            'Coefficient': model.coef_[0]
        }).sort_values(by='Coefficient', ascending=False)

        f = io.StringIO()
        with redirect_stdout(f):
            print("Model Information:")
            print(f"Regularization strength (C): {C_value}")
            print(f"Max iterations: {max_iter}")

            print("\nFeature Coefficients:")
            print(feature_importance)

            print("\nModel Performance:")
            print(f"Accuracy: {accuracy:.4f}")
            print(f"AUC-ROC: {roc_auc:.4f}")
            print("\nClassification Report:")
            print(classification_report(y_test, y_pred, target_names=target_names))

        return conf_matrix_plot, roc_plot, f.getvalue()

    except Exception as e:
        return None, None, f"Error: {str(e)}"

def train_model(test_size, random_state, C_value, max_iter):
    # Load data
    data = load_breast_cancer()
    X = pd.DataFrame(data.data, columns=data.feature_names)
    y = data.target

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the model
    model = LogisticRegression(C=C_value, max_iter=max_iter, random_state=random_state)
    model.fit(X_train_scaled, y_train)

    # Make predictions
    y_pred = model.predict(X_test_scaled)
    y_pred_prob = model.predict_proba(X_test_scaled)[:, 1]

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    # Create confusion matrix plot
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=data.target_names,
                yticklabels=data.target_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    conf_matrix_plot = plt.gcf()

    # Create ROC curve
    fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    roc_plot = plt.gcf()

    # Feature importance visualization
    feature_importance = pd.DataFrame({
        'Feature': X.columns,
        'Coefficient': model.coef_[0]
    }).sort_values(by='Coefficient', ascending=False)

    plt.figure(figsize=(10, 8))
    sns.barplot(x='Coefficient', y='Feature', data=feature_importance.head(10))
    plt.title('Top 10 Feature Importance')
    plt.tight_layout()
    importance_plot = plt.gcf()

    # Capture model information and performance
    f = io.StringIO()
    with redirect_stdout(f):
        print("Model Information:")
        print(f"Regularization strength (C): {C_value}")
        print(f"Max iterations: {max_iter}")

        print("\nFeature Coefficients (top 5):")
        print(feature_importance.head(5))

        print("\nModel Performance:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"AUC-ROC: {roc_auc:.4f}")
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred, target_names=data.target_names))

    return conf_matrix_plot, roc_plot, importance_plot, f.getvalue()

def predict_custom(radius_mean, texture_mean, perimeter_mean, area_mean, smoothness_mean):
    # Load data and train model with default parameters
    data = load_breast_cancer()
    X = pd.DataFrame(data.data, columns=data.feature_names)
    y = data.target

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Train the model
    model = LogisticRegression(random_state=42)
    model.fit(X_scaled, y)

    # Create input data with default values for features not provided
    input_data = np.zeros(shape=(1, X.shape[1]))

    # Find the feature indices
    feature_indices = {
        'mean radius': 0,
        'mean texture': 1,
        'mean perimeter': 2,
        'mean area': 3,
        'mean smoothness': 4
    }

    # Set the provided values
    input_data[0, feature_indices['mean radius']] = radius_mean
    input_data[0, feature_indices['mean texture']] = texture_mean
    input_data[0, feature_indices['mean perimeter']] = perimeter_mean
    input_data[0, feature_indices['mean area']] = area_mean
    input_data[0, feature_indices['mean smoothness']] = smoothness_mean

    # Scale the input
    input_scaled = scaler.transform(input_data)

    # Make prediction
    prediction = model.predict(input_scaled)[0]
    prediction_prob = model.predict_proba(input_scaled)[0]

    # Create visualization comparing input to dataset averages
    plt.figure(figsize=(12, 6))

    # Prepare data for visualization
    features = list(feature_indices.keys())
    input_values = [radius_mean, texture_mean, perimeter_mean, area_mean, smoothness_mean]

    # Get average values for each class
    benign_avg = X[y == 0].mean()[list(feature_indices.values())]
    malignant_avg = X[y == 1].mean()[list(feature_indices.values())]

    # Create bar chart comparison
    x = np.arange(len(features))
    width = 0.25

    plt.bar(x - width, input_values, width, label='Your Input')
    plt.bar(x, benign_avg, width, label='Avg Benign')
    plt.bar(x + width, malignant_avg, width, label='Avg Malignant')

    plt.xticks(x, features, rotation=45)
    plt.title('Feature Comparison')
    plt.legend()
    plt.tight_layout()

    result = f"Prediction: {data.target_names[prediction]}"
    result += f"\nProbability of being benign: {prediction_prob[0]:.4f}"
    result += f"\nProbability of being malignant: {prediction_prob[1]:.4f}"

    return result, plt

# Create Gradio interface
with gr.Blocks(title="Logistic Regression Demo") as demo:
    gr.Markdown("# Logistic Regression Model for Breast Cancer Detection")

    with gr.Tab("Train with Default Data"):
        gr.Markdown("## Train a logistic regression model on breast cancer dataset")
        with gr.Row():
            test_size = gr.Slider(minimum=0.1, maximum=0.5, value=0.2, step=0.05, label="Test Size")
            random_state = gr.Slider(minimum=0, maximum=100, value=42, step=1, label="Random State")

        with gr.Row():
            c_value = gr.Slider(minimum=0.01, maximum=10.0, value=1.0, step=0.1, label="Regularization Strength (C)")
            max_iter = gr.Slider(minimum=100, maximum=1000, value=100, step=50, label="Max Iterations")

        train_button = gr.Button("Train Model")

        with gr.Row():
            conf_matrix = gr.Plot(label="Confusion Matrix")
            roc_curve = gr.Plot(label="ROC Curve")

        with gr.Row():
            importance_plot = gr.Plot(label="Feature Importance")
            text_output = gr.Textbox(label="Model Information", lines=15)

        train_button.click(
            fn=train_model,
            inputs=[test_size, random_state, c_value, max_iter],
            outputs=[conf_matrix, roc_curve, importance_plot, text_output]
        )

    with gr.Tab("Train with Your Data"):
        gr.Markdown("## Upload your own dataset and train a logistic regression model")
        with gr.Row():
            file_input = gr.File(label="Upload Dataset (CSV or Excel)")

        with gr.Row():
            feature_cols = gr.Textbox(label="Feature Columns (comma-separated)", placeholder="e.g. age, income, education")
            target_col = gr.Textbox(label="Target Column (binary)", placeholder="e.g. outcome, success, failure")

        with gr.Row():
            custom_test_size = gr.Slider(minimum=0.1, maximum=0.5, value=0.2, step=0.05, label="Test Size")
            custom_random_state = gr.Slider(minimum=0, maximum=100, value=42, step=1, label="Random State")

        with gr.Row():
            custom_c_value = gr.Slider(minimum=0.01, maximum=10.0, value=1.0, step=0.1, label="Regularization Strength (C)")
            custom_max_iter = gr.Slider(minimum=100, maximum=1000, value=100, step=50, label="Max Iterations")

        custom_train_button = gr.Button("Train Model")

        with gr.Row():
            custom_conf_matrix = gr.Plot(label="Confusion Matrix")
            custom_roc_curve = gr.Plot(label="ROC Curve")

        custom_text_output = gr.Textbox(label="Model Information", lines=15)

        custom_train_button.click(
            fn=train_model_from_upload,
            inputs=[file_input, feature_cols, target_col, custom_test_size, custom_random_state, custom_c_value, custom_max_iter],
            outputs=[custom_conf_matrix, custom_roc_curve, custom_text_output]
        )

    with gr.Tab("Make Predictions"):
        gr.Markdown("## Predict breast cancer diagnosis with your own input")

        with gr.Row():
            radius_mean = gr.Slider(minimum=5, maximum=30, value=14, label="Mean Radius")
            texture_mean = gr.Slider(minimum=5, maximum=40, value=19, label="Mean Texture")

        with gr.Row():
            perimeter_mean = gr.Slider(minimum=40, maximum=200, value=92, label="Mean Perimeter")
            area_mean = gr.Slider(minimum=100, maximum=2500, value=650, label="Mean Area")
            smoothness_mean = gr.Slider(minimum=0.05, maximum=0.2, value=0.1, label="Mean Smoothness")

        predict_button = gr.Button("Predict")

        with gr.Row():
            prediction_output = gr.Textbox(label="Prediction Result")
            feature_comparison_plot = gr.Plot(label="Feature Comparison")

        predict_button.click(
            fn=predict_custom,
            inputs=[radius_mean, texture_mean, perimeter_mean, area_mean, smoothness_mean],
            outputs=[prediction_output, feature_comparison_plot]
        )

demo.launch()

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

