<a href="https://colab.research.google.com/github/Nikhitaa2329/genAI1/blob/main/LinearRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Linear Regression with Gradio Interface - Improved with User Input and Graphs

!pip install gradio scikit-learn pandas numpy matplotlib

import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import io
from contextlib import redirect_stdout

def train_model_from_upload(file, feature_cols, target_col, test_size, random_state):
    try:
        # Read the uploaded file
        if file.name.endswith('.csv'):
            df = pd.read_csv(file.name)
        elif file.name.endswith(('.xls', '.xlsx')):
            df = pd.read_excel(file.name)
        else:
            return None, "Unsupported file format. Please upload a CSV or Excel file."

        # Validate feature and target columns
        if target_col not in df.columns:
            return None, f"Target column '{target_col}' not found in dataset."

        feature_list = [f.strip() for f in feature_cols.split(',')]
        for feature in feature_list:
            if feature not in df.columns:
                return None, f"Feature column '{feature}' not found in dataset."

        # Prepare data
        X = df[feature_list]
        y = df[target_col]

        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state
        )

        # Train the model
        model = LinearRegression()
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Calculate metrics
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        # Create a scatter plot
        plt.figure(figsize=(10, 6))
        plt.scatter(y_test, y_pred, alpha=0.5)
        plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
        plt.xlabel('Actual Values')
        plt.ylabel('Predicted Values')
        plt.title('Linear Regression: Actual vs Predicted')

        # Capture model information
        feature_importance = pd.DataFrame({
            'Feature': X.columns,
            'Coefficient': model.coef_
        }).sort_values(by='Coefficient', ascending=False)

        # Create a bar chart for feature importance
        plt.figure(figsize=(10, 6))
        feature_importance.plot(x='Feature', y='Coefficient', kind='bar')
        plt.title('Feature Importance')
        plt.tight_layout()

        f = io.StringIO()
        with redirect_stdout(f):
            print("Model Information:")
            print(f"Intercept: {model.intercept_:.4f}")
            print("\nFeature Coefficients:")
            print(feature_importance)
            print("\nModel Performance:")
            print(f"Mean Squared Error: {mse:.4f}")
            print(f"R² Score: {r2:.4f}")

        return plt, f.getvalue()

    except Exception as e:
        return None, f"Error: {str(e)}"

def train_model_default(test_size, random_state):
    # Load data
    from sklearn.datasets import fetch_california_housing
    housing = fetch_california_housing()
    X = pd.DataFrame(housing.data, columns=housing.feature_names)
    y = housing.target

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    # Train the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Create a scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, y_pred, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title('Linear Regression: Actual vs Predicted')

    # Create residual plot
    plt.figure(figsize=(10, 6))
    residuals = y_test - y_pred
    plt.scatter(y_pred, residuals, alpha=0.5)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.title('Residual Plot')

    # Capture model information
    feature_importance = pd.DataFrame({
        'Feature': X.columns,
        'Coefficient': model.coef_
    }).sort_values(by='Coefficient', ascending=False)

    f = io.StringIO()
    with redirect_stdout(f):
        print("Model Information:")
        print(f"Intercept: {model.intercept_:.4f}")
        print("\nFeature Coefficients:")
        print(feature_importance)
        print("\nModel Performance:")
        print(f"Mean Squared Error: {mse:.4f}")
        print(f"R² Score: {r2:.4f}")

    return plt, f.getvalue()

def predict_custom(MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude):
    # Load data and train model with default parameters
    from sklearn.datasets import fetch_california_housing
    housing = fetch_california_housing()
    X = pd.DataFrame(housing.data, columns=housing.feature_names)
    y = housing.target

    model = LinearRegression()
    model.fit(X, y)

    # Make a prediction with user input
    input_data = np.array([[MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude]])
    prediction = model.predict(input_data)[0]

    # Create a bar chart comparing the input values to the mean values
    input_df = pd.DataFrame(input_data, columns=housing.feature_names)
    mean_df = pd.DataFrame([X.mean().values], columns=housing.feature_names)

    comparison_df = pd.DataFrame({
        'Feature': housing.feature_names,
        'Your Input': input_df.iloc[0].values,
        'Dataset Mean': mean_df.iloc[0].values
    })

    # Create a radar chart or comparative bar chart
    plt.figure(figsize=(12, 6))
    comparison_df_melted = pd.melt(comparison_df, id_vars=['Feature'], value_vars=['Your Input', 'Dataset Mean'])
    g = plt.subplot(111)
    width = 0.35
    x = np.arange(len(housing.feature_names))
    g.bar(x - width/2, input_df.iloc[0].values, width, label='Your Input')
    g.bar(x + width/2, mean_df.iloc[0].values, width, label='Dataset Mean')
    g.set_xticks(x)
    g.set_xticklabels(housing.feature_names, rotation=45, ha='right')
    g.legend()
    plt.title('Your Input vs Dataset Mean')
    plt.tight_layout()

    return f"Predicted house value: ${prediction * 100:.2f}K", plt

# Create Gradio interface
with gr.Blocks(title="Linear Regression Demo") as demo:
    gr.Markdown("# Linear Regression Model")

    with gr.Tab("Train with Default Data"):
        gr.Markdown("## Train a linear regression model on California Housing dataset")
        with gr.Row():
            test_size = gr.Slider(minimum=0.1, maximum=0.5, value=0.2, step=0.05, label="Test Size")
            random_state = gr.Slider(minimum=0, maximum=100, value=42, step=1, label="Random State")

        train_button = gr.Button("Train Model")

        with gr.Row():
            plot_output = gr.Plot(label="Model Visualization")
            text_output = gr.Textbox(label="Model Information", lines=10)

        train_button.click(
            fn=train_model_default,
            inputs=[test_size, random_state],
            outputs=[plot_output, text_output]
        )

    with gr.Tab("Train with Your Data"):
        gr.Markdown("## Upload your own dataset and train a linear regression model")
        with gr.Row():
            file_input = gr.File(label="Upload Dataset (CSV or Excel)")

        with gr.Row():
            feature_cols = gr.Textbox(label="Feature Columns (comma-separated)", placeholder="e.g. age, income, education")
            target_col = gr.Textbox(label="Target Column", placeholder="e.g. price")

        with gr.Row():
            custom_test_size = gr.Slider(minimum=0.1, maximum=0.5, value=0.2, step=0.05, label="Test Size")
            custom_random_state = gr.Slider(minimum=0, maximum=100, value=42, step=1, label="Random State")

        custom_train_button = gr.Button("Train Model")

        with gr.Row():
            custom_plot_output = gr.Plot(label="Model Visualization")
            custom_text_output = gr.Textbox(label="Model Information", lines=10)

        custom_train_button.click(
            fn=train_model_from_upload,
            inputs=[file_input, feature_cols, target_col, custom_test_size, custom_random_state],
            outputs=[custom_plot_output, custom_text_output]
        )

    with gr.Tab("Make Predictions"):
        gr.Markdown("## Predict housing prices with your own input values")

        with gr.Row():
            with gr.Column():
                medinc = gr.Slider(minimum=0, maximum=15, value=3.5, label="Median Income (tens of thousands)")
                house_age = gr.Slider(minimum=0, maximum=60, value=25, label="House Age (years)")
                ave_rooms = gr.Slider(minimum=0, maximum=10, value=5, label="Average Rooms")
                ave_bedrms = gr.Slider(minimum=0, maximum=5, value=1, label="Average Bedrooms")

            with gr.Column():
                population = gr.Slider(minimum=0, maximum=10000, value=1500, label="Population")
                ave_occup = gr.Slider(minimum=0, maximum=10, value=3, label="Average Occupancy")
                latitude = gr.Slider(minimum=32, maximum=42, value=35, label="Latitude")
                longitude = gr.Slider(minimum=-125, maximum=-114, value=-119, label="Longitude")

        predict_button = gr.Button("Predict")

        with gr.Row():
            prediction_output = gr.Textbox(label="Prediction Result")
            input_comparison_plot = gr.Plot(label="Input Comparison")

        predict_button.click(
            fn=predict_custom,
            inputs=[medinc, house_age, ave_rooms, ave_bedrms, population, ave_occup, latitude, longitude],
            outputs=[prediction_output, input_comparison_plot]
        )

demo.launch()

Collecting gradio
  Downloading gradio-5.25.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

<IPython.core.display.Javascript object>

