In [None]:
!pip install crewai gradio



In [None]:
import pandas as pd
import gradio as gr
from crewai import Agent
import os
import shutil

# Set the environment variable for OpenAI API Key
#os.environ["OPENAI_API_KEY"] = ""

***Data Ingestion & Problem Identifier Agent***

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Specify the directory where the uploaded files will be stored in Google Drive
DRIVE_DIRECTORY = "/content/drive/My Drive/DEVA_Data_Files"

# Ensure the directory exists
if not os.path.exists(DRIVE_DIRECTORY):
    os.makedirs(DRIVE_DIRECTORY)

Mounted at /content/drive


In [None]:
# Data Ingestion Agent - Function to upload and store the file
def upload_and_store(file_obj):
    """Handles file upload and stores the Dataset (CSV or Excel) file into Google Drive."""
    file_path = os.path.join(DRIVE_DIRECTORY, file_obj.name)

    try:
        # Check if the uploaded file is CSV or Excel
        if file_obj.name.endswith('.csv'):
            df = pd.read_csv(file_obj.name)
            file_path = os.path.join(DRIVE_DIRECTORY, "telecom_dataset.csv")

        elif file_obj.name.endswith('.xlsx'):
            df = pd.read_excel(file_obj.name)
            file_path = os.path.join(DRIVE_DIRECTORY, "telecom_dataset.xlsx")

        else:
            return "Please upload a valid CSV or Excel file.", None, None, None

        # Save the DataFrame to the appropriate format in Google Drive
        if file_obj.name.endswith('.csv'):
            df.to_csv(file_path, index=False)
        elif file_obj.name.endswith('.xlsx'):
            df.to_excel(file_path, index=False)


        preview = df.head().to_html(index=False)

        column_names = df.columns.tolist()

        return (f"File uploaded and stored successfully in Google Drive: {file_path}",
                preview, df, column_names)

    except Exception as e:
        return (f"Error while saving the file: {str(e)}", None, None, None)



# Problem Identifier Agent
class ProblemIdentifierAgent:
    def __init__(self, role, goal, backstory):
        self.role = role
        self.goal = goal
        self.backstory = backstory

    def run(self, dataset_details):
        dataset_name = dataset_details['name']
        dataset_columns = dataset_details['columns']

        problem_statements = self.analyze_data(dataset_columns)

        for problem in problem_statements:
            problem['suggested_algorithms'] = self.suggest_algorithms(problem['statement'])

        return {
            "dataset_name": dataset_name,
            "problem_statements": problem_statements
        }

    def analyze_data(self, columns):
        problem_statements = []

        # Customer Churn Prediction
        if all(feature in columns for feature in ["tenure", "Contract", "MonthlyCharges", "TotalCharges", "InternetService", "OnlineSecurity", "TechSupport", "PaymentMethod", "Churn"]):
            problem_statements.append({
                "statement": "Predict customer churn based on tenure, contract type, and service usage."
            })

        # Customer Segmentation for Personalized Marketing
        if all(feature in columns for feature in ["gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "StreamingTV", "StreamingMovies", "Contract", "PaymentMethod", "MonthlyCharges"]):
            problem_statements.append({
                "statement": "Segment customers for personalized marketing based on demographics and service usage."
            })

        # Service Quality Improvement and Complaint Reduction
        if all(feature in columns for feature in ["TechSupport", "DeviceProtection", "OnlineSecurity", "InternetService", "MultipleLines", "StreamingTV", "StreamingMovies", "Churn"]):
            problem_statements.append({
                "statement": "Analyze service features to improve quality and reduce customer complaints."
            })

        # Customer Lifetime Value (CLV) Prediction
        if all(feature in columns for feature in ["tenure", "Contract", "MonthlyCharges", "TotalCharges", "PaymentMethod", "InternetService", "StreamingTV", "StreamingMovies"]):
            problem_statements.append({
                "statement": "Predict customer lifetime value based on service usage and contract details."
            })

        return problem_statements[:3]


    def suggest_algorithms(self, problem_statement):
        # Suggest classification algorithms based on the problem statement
        if "churn" in problem_statement.lower():
            return ["Logistic Regression", "Random Forest", "Support Vector Machine"]
        elif "segment" in problem_statement.lower():
            return ["K-Means Clustering", "Hierarchical Clustering"]
        elif "service quality" in problem_statement.lower():
            return ["Decision Trees", "Random Forest", "Gradient Boosting"]
        elif "lifetime value" in problem_statement.lower():
            return ["Linear Regression", "Random Forest", "XGBoost"]
        else:
            return ["Logistic Regression", "Naive Bayes", "K-Nearest Neighbors"]


# Function to process input and return output
def process_input(df):
    dataset_columns = df.columns.tolist()

    dataset_details = {
        "name": "Default Dataset Name",
        "description": "Default Dataset Description",
        "columns": dataset_columns
    }

    # Instantiate the agent with required parameters
    role = "Problem Statement Identifier"
    goal = "Identify problem statements related to customer churn and suggest algorithms."
    backstory = "This agent analyzes dataset columns to generate relevant problem statements."

    agent = ProblemIdentifierAgent(role, goal, backstory)
    results = agent.run(dataset_details)

    output = ""
    for problem in results['problem_statements']:
        output += f"""
        <div style="border: 1px solid #007BFF; border-radius: 8px; padding: 16px; margin: 8px; width: 300px; display: inline-block; background-color: black; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);">
            <h4 style="color: #007BFF;">Problem Statement:</h4>
            <p style="color:white;">{problem['statement']}</p>
            <h5 style="color: #28a745;">Suggested Algorithms:</h5>
            <p style="color:#e4f1fe">{', '.join(problem['suggested_algorithms'])}</p>
        </div>
        """


    preview = f"""
    <div style="overflow-x: auto; padding: 10px; border-radius: 8px;">
        {df.head().to_html(index=False, escape=False)}
    </div>
    """

    return output, preview


In [None]:
"""# prompt: i need to rename the uploaded dataset as dataset.csv or dataset.xlsx before storing the file in drive for the above data ingestion agent.

import pandas as pd
import gradio as gr
from crewai import Agent
import os
import shutil
from google.colab import drive
!pip install crewai gradio

# Set the environment variable for OpenAI API Key
os.environ["OPENAI_API_KEY"] = "sk-dlVx1Oj_9DLB_HRektlo7ht-fjZs_kzPfth3cA768pT3BlbkFJ6c4s_azIYP4OC7Y4BGJHCL0pfgTIk-SbGsV1B7lMIA"
# ***Data Ingestion & Problem Identifier Agent***

# Mount Google Drive
drive.mount('/content/drive')

# Specify the directory where the uploaded files will be stored in Google Drive
DRIVE_DIRECTORY = "/content/drive/My Drive/data_files"

# Ensure the directory exists
if not os.path.exists(DRIVE_DIRECTORY):
    os.makedirs(DRIVE_DIRECTORY)
# Data Ingestion Agent - Function to upload and store the file
def upload_and_store(file_obj):
    #Handles file upload and stores the Dataset (CSV or Excel) file into Google Drive.
    file_path = os.path.join(DRIVE_DIRECTORY, "dataset.csv")  # Default to CSV

    try:
        # Check if the uploaded file is CSV or Excel
        if file_obj.name.endswith('.csv'):
            # Read the CSV file into a Pandas DataFrame
            df = pd.read_csv(file_obj.name)
            file_path = os.path.join(DRIVE_DIRECTORY, "dataset.csv") # Ensure correct file path
        elif file_obj.name.endswith('.xlsx'):
            # Read the Excel file into a Pandas DataFrame
            df = pd.read_excel(file_obj.name)
            file_path = os.path.join(DRIVE_DIRECTORY, "dataset.xlsx") # Change file path for Excel
        else:
            return "Please upload a valid CSV or Excel file.", None, None, None

        # Save the DataFrame to the appropriate format in Google Drive
        if file_obj.name.endswith('.csv'):
            df.to_csv(file_path, index=False)
        elif file_obj.name.endswith('.xlsx'):
            df.to_excel(file_path, index=False)


        # Show a preview of the uploaded DataFrame
        preview = df.head().to_html(index=False)

        # Get the column names from the DataFrame
        column_names = df.columns.tolist()

        # Return status message, preview, DataFrame, and column names
        return (f"File uploaded and stored successfully in Google Drive: {file_path}",
                preview, df, column_names)
    except Exception as e:
        return (f"Error while saving the file: {str(e)}", None, None, None)
# ... rest of your code"""

***Data Preprocessing***

In [None]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

class DataPreprocessingAgent:
    def __init__(self, dataframe):
        self.df = dataframe.copy()

    def handle_missing_values(self, strategy, threshold):
        output = "### Step 1: Handling missing values\n"
        output += f"Using strategy: {strategy}, threshold: {threshold}%\n"

        missing_data = self.df.isnull().sum()
        total_rows = len(self.df)
        total_missing = missing_data.sum()

        if total_missing == 0:
            output += "No missing values found.\n"
        else:
            output += f"Total missing values found: {total_missing}\n"
            for column, missing_count in missing_data.items():
                if missing_count > 0:
                    missing_percentage = (missing_count / total_rows) * 100
                    output += f"Column '{column}' has {missing_count} missing values ({missing_percentage:.2f}%).\n"

                    if missing_percentage <= threshold:
                        output += f"Dropping rows with missing values in '{column}' (below threshold of {threshold}%).\n"
                        self.df = self.df.dropna(subset=[column])
                    else:
                        output += f"Filling missing values in '{column}' using {strategy} (above threshold of {threshold}%).\n"
                        if self.df[column].dtype in ['int64', 'float64']:
                            if strategy == 'mean':
                                self.df[column].fillna(self.df[column].mean(), inplace=True)
                            elif strategy == 'median':
                                self.df[column].fillna(self.df[column].median(), inplace=True)
                        else:  # For non-numeric columns, always use mode
                            self.df[column].fillna(self.df[column].mode().iloc[0], inplace=True)
                            output += f"Note: Using mode for non-numeric column '{column}'\n"

        return output

    def remove_duplicates(self):
        output = "### Step 2: Checking for duplicate entries\n"
        duplicate_count = self.df.duplicated().sum()
        if duplicate_count > 0:
            output += f"Found {duplicate_count} duplicate rows. Removing duplicates.\n"
            self.df.drop_duplicates(inplace=True)
        else:
            output += "No duplicate rows found.\n"

        return output

    def drop_irrelevant_columns(self):
        output = "### Step 3: Dropping specified irrelevant columns\n"
        if 'customerID' in self.df.columns:
            output += "Dropping column 'customerID'.\n"
            self.df.drop(columns=['customerID'], inplace=True)
        else:
            output += "Column 'customerID' not found; skipping.\n"

        return output

    def convert_data_types(self):
        output = "### Step 4: Checking datatypes of columns and converting eligible ones\n"
        for column in self.df.columns:
            if self.df[column].dtype == 'object':
                converted_column = pd.to_numeric(self.df[column].str.strip(), errors='coerce')
                if converted_column.notnull().sum() > 0:
                    self.df[column] = converted_column
                    output += f"Success: Converted '{column}' to numeric.\n"
                else:
                    output += f"Skipped: '{column}' contains non-numeric values.\n"

        return output

    def check_outliers(self):
        output = "### Step 5: Checking for outliers\n"
        numeric_cols = self.df.select_dtypes(include=['int64', 'float64']).columns
        figures = []

        for column in numeric_cols:
            if self.df[column].nunique() == 2:
                output += f"Skipping outlier detection for binary column: '{column}' (values: {self.df[column].unique()})\n"
                continue

            Q1 = self.df[column].quantile(0.25)
            Q3 = self.df[column].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            outliers = ((self.df[column] < lower_bound) | (self.df[column] > upper_bound)).sum()

            output += f"Column '{column}' has {outliers} outliers.\n"

            if outliers > 0:
                self.df = self.df[(self.df[column] >= lower_bound) & (self.df[column] <= upper_bound)]
                output += f"Removed outliers from '{column}'.\n"

            plt.figure(figsize=(8, 4))
            sns.boxplot(x=self.df[column])
            plt.title(f"Distribution of '{column}' after handling outliers")
            figure_path = f'{column}_outlier_distribution.png'
            plt.savefig(figure_path)
            plt.close()
            figures.append(figure_path)

        return output, figures

    def preprocess(self, strategy, threshold):
        steps_output = []

        steps_output.append(self.handle_missing_values(strategy=strategy, threshold=threshold))
        steps_output.append(self.remove_duplicates())
        steps_output.append(self.drop_irrelevant_columns())
        steps_output.append(self.convert_data_types())

        outliers_output, figures = self.check_outliers()
        steps_output.append(outliers_output)


        return "\n".join(steps_output), figures, self.df

# Global variable to store the processed dataframe
processed_df = None

def run_preprocessing(strategy, threshold):
    global processed_df
    try:
        # dataset_url = "https://raw.githubusercontent.com/Suryansh-gp/DEVA_AI/main/Telecom_Churn.csv"
        telco_data = pd.read_csv("/content/drive/MyDrive/DEVA_Data_Files/telecom_dataset.csv")

        agent = DataPreprocessingAgent(telco_data)

        steps_output, figures, df = agent.preprocess(strategy=strategy, threshold=float(threshold))
        processed_df = df

        return steps_output, figures
    except Exception as e:
        return f"Error occurred during preprocessing: {str(e)}", []

def view_final_dataset():
    global processed_df
    if processed_df is not None:
          final_dataset_path = os.path.join(DRIVE_DIRECTORY, "preprocessed_dataset.csv")
          processed_df.to_csv(final_dataset_path, index=False)
          print("\nPreprocessed Dataset Saved Successfully!!!")

          return processed_df.head()

    return pd.DataFrame()



***Feature Engineering***

In [None]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from imblearn.over_sampling import SMOTE
import tempfile
import os

class FeatureEngineeringAgent:
    def __init__(self, dataframe, target_column='Churn'):
        self.df = dataframe
        self.target_column = target_column
        self.progress_message = ""

    def add_progress(self, message):
        self.progress_message += message + "\n"
        return self.progress_message

    def encode_categorical(self):
        self.add_progress("\n*** Step 1: Encoding Categorical Columns ***\n")

        categorical_cols = self.df.select_dtypes(include=['object']).columns.tolist()
        binary_cols = [col for col in categorical_cols if sorted(self.df[col].unique()) == ['No', 'Yes']]

        self.add_progress(f"Binary categorical columns identified: {binary_cols}")

        for col in binary_cols:
            self.df[col] = self.df[col].apply(lambda x: 1 if x == 'Yes' else 0)
            self.add_progress(f"Encoded '{col}' with binary encoding (Yes=1, No=0)")

        categorical_cols = [col for col in categorical_cols if col not in binary_cols]

        if len(categorical_cols) > 0:
            self.add_progress(f"\nNon-binary categorical columns identified: {categorical_cols}")
            for col in categorical_cols:
                le = LabelEncoder()
                self.df[col] = le.fit_transform(self.df[col])
                self.add_progress(f"Encoded '{col}' with label encoding")
        else:
            self.add_progress("\nNo non-binary categorical columns found")

        return self.df

    def handle_missing_values(self):
        self.add_progress("\n*** Step 2: Handling Missing Values ***\n")

        missing_counts = self.df.isnull().sum()
        missing_cols = missing_counts[missing_counts > 0]

        if len(missing_cols) > 0:
            self.add_progress("Missing values found in columns:")
            for col, count in missing_cols.items():
                self.add_progress(f"- {col}: {count} missing values")
        else:
            self.add_progress("No missing values found in the dataset")
            return self.df

        numeric_cols = self.df.select_dtypes(include=['float64', 'int64']).columns.tolist()
        self.df[numeric_cols] = self.df[numeric_cols].fillna(self.df[numeric_cols].mean())
        self.add_progress("\nFilled numeric columns with mean values")

        categorical_cols = self.df.select_dtypes(include=['object']).columns.tolist()
        for col in categorical_cols:
            self.df[col] = self.df[col].fillna(self.df[col].mode()[0])
        self.add_progress("Filled categorical columns with mode values")

        return self.df

    def plot_class_distribution(self, target_column, title="Class Distribution"):
        plt.figure(figsize=(6, 4))
        sns.countplot(data=self.df, x=target_column, hue=target_column, legend=False, palette="viridis")
        plt.title(title)
        plt.xlabel(target_column)
        plt.ylabel("Count")

        temp_dir = tempfile.gettempdir()
        temp_path = os.path.join(temp_dir, f"{title.lower().replace(' ', '_')}.png")
        plt.savefig(temp_path, format='png', bbox_inches='tight')
        plt.close()
        return temp_path

    def check_imbalance(self):
        self.add_progress("\n*** Step 3: Checking Class Imbalance ***\n")

        initial_dist_plot = self.plot_class_distribution(self.target_column, "Initial Class Distribution")

        class_distribution = self.df[self.target_column].value_counts()
        self.add_progress("Class distribution:")
        for class_name, count in class_distribution.items():
            self.add_progress(f"- {class_name}: {count} samples")

        majority_class_count = class_distribution.max()
        minority_class_count = class_distribution.min()
        imbalance_ratio = majority_class_count / minority_class_count

        self.add_progress(f"\nImbalance ratio (majority:minority): {imbalance_ratio:.2f}")

        final_dist_plot = None
        if imbalance_ratio > 1.5:
            self.add_progress("\nApplying SMOTE to balance the classes...")
            X = self.df.drop(columns=[self.target_column])
            y = self.df[self.target_column]
            smote = SMOTE(random_state=42)
            X_resampled, y_resampled = smote.fit_resample(X, y)
            self.df = pd.concat([X_resampled, y_resampled], axis=1).reset_index(drop=True)

            final_dist_plot = self.plot_class_distribution(self.target_column, "Distribution After SMOTE")

            new_distribution = self.df[self.target_column].value_counts()
            self.add_progress("\nClass distribution after SMOTE:")
            for class_name, count in new_distribution.items():
                self.add_progress(f"- {class_name}: {count} samples")
        else:
            self.add_progress("\nData is relatively balanced. No SMOTE needed.")

        return self.df, initial_dist_plot, final_dist_plot, imbalance_ratio > 1.5

    def feature_engineer(self):
        self.add_progress("Starting feature engineering process...")

        self.encode_categorical()
        self.handle_missing_values()
        df, initial_plot, final_plot, was_balanced = self.check_imbalance()

        self.add_progress("\nFeature engineering completed successfully!")

        return df, initial_plot, final_plot, was_balanced, self.progress_message

def featuring_data(target_column):
    try:
        # processed_data = pd.read_csv("/content/preprocessed_telecom_data.csv")
        processed_data = pd.read_csv("/content/drive/MyDrive/DEVA_Data_Files/preprocessed_dataset.csv")
        agent = FeatureEngineeringAgent(processed_data, target_column)
        df, initial_plot, final_plot, was_balanced, progress = agent.feature_engineer()

        # output_path = "featured_telecom_data.csv"
        output_path = os.path.join(DRIVE_DIRECTORY, "featured_telecom_data.csv")
        df.to_csv(output_path, index=False)
        print("Featured Dataset Saved Successfully!")
        # Return None for the DataFrame to avoid showing preview
        return progress, initial_plot, final_plot, None

    except Exception as e:
        return f"Error occurred: {str(e)}", None, None, None

def view_dataset():
    try:
        featured_df = pd.read_csv("/content/drive/MyDrive/DEVA_Data_Files/featured_telecom_data.csv")
        return gr.DataFrame(value=featured_df.head())

    except Exception as e:
        return gr.DataFrame(value=pd.DataFrame({'Error': [str(e)]}))


### **Gradio Unified Webpage**

In [None]:
# Create the Gradio interface
def main_interface():
  with gr.Blocks(theme=gr.themes.default) as app:
        # Custom CSS for styling
        app.css = """
            .title {
                text-align: center;
                font-family: 'Times New Roman', Times, serif;
                font-size: 24px;
                font-weight: bold;
                color:black ;  /* Title text color */
                padding-bottom: 10px; /* Add padding below the title */
            }

            .heading {
                font-family: 'Times New Roman', Times, serif;
                font-size: 18px;
                font-weight:700;
                font-style: italic;
                color:black;  /* Heading text color */
                text-align: left;  /* Align the heading to the left */
                padding:5px 0px 3px 18px;  /* Add padding to the left */
                  /* Add padding above the heading */
            }
            .fixed-size-btn {
                width:30px;   /* Reduced button width */
                height: 40px;  /* Adjusted button height */
                font-family: 'Times New Roman', Times, serif;
                font-size: 16px;
                font-weight:500;
                color:black;  /* Button text color */
                border:none;  /* Remove button borders */
                border-radius: 5px;
                padding: 5px;  /* Add padding to ensure button text fits */
            }

            gr.file {
              font-family: 'Times New Roman', Times, serif;
              font-size: 16px;
              font-weight : 500;
              color: black;
            }

            .gr-markdown, .gr-textbox, .gr-html, .gr-button, .gr-file {
                font-size : 16px;
                font-family: 'Times New Roman', Times, serif;  /* Apply Times New Roman to all Gradio elements */
                color:black;  /* Text color for Gradio elements */
            }
            input, select, textarea {
                font-family: 'Times New Roman', Times, serif;  /* Apply Times New Roman to all input elements */
                color:black;  /* Text color for input elements */
                border: 1px solid;  /* Border color for inputs */
            }
        """

        with gr.Row():
            # Title with custom font, size, and centered
            gr.Markdown("<div class='title'>DEVA AI</div>")

      #Data Ingestion and PS Page
        with gr.Tab("Data Ingestion"):
        # Left-aligned heading with padding
            gr.Markdown("<div class='heading'>Data Ingestion Agent</div>")

            # File upload section
            file_upload = gr.File(label="Upload Dataset")

            with gr.Row():
                # Store Dataset Button with reduced size
                store_btn = gr.Button("Store Dataset in Drive", elem_classes="fixed-size-btn")
                result_text = gr.Textbox(label="Status", interactive=False)
                uploaded_df = gr.State()  # To hold the DataFrame

            # Preview and Column Names display
            preview_output = gr.HTML()  # Preview of the DataFrame
            column_output = gr.Textbox(label="Column Names", interactive=False)  # Column names display

            # Click event for storing the file
            store_btn.click(fn=upload_and_store, inputs=file_upload, outputs=[result_text, preview_output, uploaded_df, column_output])

            gr.Markdown("<div class='heading'>Problem Identifier Agent</div>")
            with gr.Row():
                # Problem Statement Identifier Button with reduced size
                problem_btn = gr.Button("Problem Identifier Agent", elem_classes="fixed-size-btn")
                problem_output = gr.HTML()

            # Process input and identify problems using the uploaded DataFrame state
            problem_btn.click(fn=process_input, inputs=uploaded_df, outputs=[problem_output, preview_output])

        with gr.Tab("Data Preprocessing"):

            gr.Markdown("<div class='heading'>Data Preprocessing Agent</div>")

            with gr.Row():
                strategy_input = gr.Dropdown(
                    choices=["mean", "median", "mode"],
                    value="mean",  # Set default value
                    label="Missing Value Strategy",
                    info="Choose strategy for handling missing values"
                )
                threshold_input = gr.Slider(
                    minimum=0,
                    maximum=100,
                    value=30,
                    step=1,
                    label="Missing Values Threshold (%)",
                    info="Maximum percentage of missing values before using chosen strategy"
                )

            preprocess_button = gr.Button("Run Preprocessing")
            progress_output = gr.Textbox(label="Progress", interactive=False, lines=10, max_lines=15)
            outlier_images_output = gr.Gallery(label="Outlier Visualizations", show_label=True, elem_id="outlier-images", columns=2)

            view_button = gr.Button("View Preprocessed Dataset")
            final_dataset_output = gr.Dataframe()

            preprocess_button.click(
                fn=run_preprocessing,
                inputs=[strategy_input, threshold_input],
                outputs=[progress_output, outlier_images_output]
            )

            view_button.click(
                fn=view_final_dataset,
                outputs=final_dataset_output
            )

        with gr.Tab("Feature Engineering"):

            gr.Markdown("<div class='heading'>Feature Engineering Agent</div>")
            gr.Markdown("This tool performs feature engineering on telecom customer churn data.")

            with gr.Row():
                target_col = gr.Textbox(
                    label="Target Column",
                    value="Churn",
                    placeholder="Enter target column name"
                )

            # with gr.Row():
            feature_btn = gr.Button("Feature Engineering")
            with gr.Row():
                txt = gr.Textbox(label="Progress Output", lines=15)
            with gr.Row():
                initial_dist = gr.Image(label="Initial Class Distribution")
                final_dist = gr.Image(label="Final Class Distribution (after SMOTE)")

            view_btn = gr.Button("View Featured Dataset")
            with gr.Row():
                output_df = gr.DataFrame(label="Featured Dataset Preview")

            feature_btn.click(
                fn=featuring_data,
                inputs=[target_col],
                outputs=[txt, initial_dist, final_dist, output_df]
            )

            view_btn.click(
                fn=view_dataset,
                inputs=[],
                outputs=[output_df]
            )

  app.launch()

if __name__ == "__main__":
    main_interface()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a250ab85c80d8b6d02.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
gr.close_all()