In [None]:
import gradio as gr
import pandas as pd

In [None]:


# df_store = {}  # dictionary to store file-wise dataframes

# def read_csv(file_obj):
#     try:
#         file_obj.seek(0)  # important for notebook file input
#         df = pd.read_csv(file_obj)
#         df_store["df"] = df
#         return gr.update(visible=True), gr.update(visible=False), file_obj.name
#     except Exception as e:
#         return gr.update(visible=False), gr.update(visible=True), f"Error: {str(e)}"


In [25]:
def perform_basic_eda():
    df = df_store.get("df")
    if df is None:
        return "No DataFrame loaded. Please upload a file first.", gr.update(visible=False)

    num_cols = df.select_dtypes(include="number").columns.tolist()
    cat_cols = df.select_dtypes(exclude="number").columns.tolist()

    eda_report = f"""
### 📊 Basic EDA Summary

- **Rows:** {df.shape[0]}
- **Columns:** {df.shape[1]}
- **Missing values:** {df.isnull().sum().sum()}
- **Numeric columns:** {len(num_cols)} → {', '.join(num_cols)}
- **Categorical columns:** {len(cat_cols)} → {', '.join(cat_cols)}

#### 🔍 Sample Summary:
{df.describe(include='all').transpose().round(2).to_markdown()}
"""
    return eda_report, gr.update(visible=True)


In [9]:

# Sample EDA output
sample_eda = """
### Basic EDA Summary

- **Rows:** 150  
- **Columns:** 5  
- **Missing Values:** 0  
- **Numeric Columns:** 3  
- **Categorical Columns:** 2  

#### Column Overview:
- `sepal_length`: Mean = 5.84, Std = 0.83  
- `sepal_width`: Mean = 3.06, Std = 0.44  
- `species`: 3 unique classes
"""

In [10]:
# Sample response for Q&A
sample_reasoning = """
**Step 1**: Identified numeric columns  
**Step 2**: Applied aggregation functions  
**Step 3**: Validated column distributions
"""

In [11]:
sample_answer = """
The dataset shows that 'virginica' has the highest average petal length.
"""

In [12]:

sample_questions = [
    "What is the average of each numeric column?",
    "Which column has the most missing values?",
    "What are the unique values in each categorical column?",
    "Which column has highest correlation with target?",
    "Give me summary statistics of the dataset."
]

In [13]:
# Placeholder for file info
file_info = {
    "name": "iris.csv",
    "description": "Iris flower dataset",
    "rows": 150,
    "columns": 5
}

In [14]:

# ---- Functions ----

# def upload_file(file):
#     if file is None:
#         return gr.update(visible=False), gr.update(visible=True), ""
#     return gr.update(visible=True), gr.update(visible=False), file.name

# def show_eda():
#     return gr.update(value=sample_eda, visible=True), gr.update(visible=True)

def move_to_qa():
    return gr.update(visible=False), gr.update(visible=True)

def ask_question(q):
    return sample_reasoning, sample_answer

def fill_question(q):
    return q

def get_question_func(q_text):
    def return_q():
        return q_text
    return return_q


In [34]:
# ---- UI Blocks ----

with gr.Blocks(title="ADA - AI-based Data Analyzer") as demo:
    uploaded_file = gr.State()
    file_name = gr.State(value=file_info["name"])

    # ----------- Upload Screen -----------
    with gr.Tab(label="Load Data",visible=True) as upload_screen:
        with gr.Column():
            gr.Markdown("## 👋 Welcome to ADA\nUpload a CSV file to get started.")
            file_input = gr.File(file_types=[".csv"], label="Upload your CSV file")
            submit_btn = gr.Button("Submit")

    # # # ----------- EDA Screen -----------
    with gr.Tab(label="EDA",visible=False) as eda_screen:
        eda_btn = gr.Button("📊 Process the data and do Basic EDA")
        eda_output = gr.Markdown(visible=False)
        next_btn = gr.Button("✅ If satisfied with the Data, let's move to Q&A", visible=False)

    # # ----------- Q&A Screen -----------
    with gr.Tab(label="Questions & Answers",visible=False) as qa_screen:
        gr.Markdown("## ✨ Here happens the real magic")
        with gr.Row():
            question_box = gr.Textbox(label="Ask your question on the data", scale=4)
            ask_btn = gr.Button("Find answers", scale=1)
        with gr.Row():
            reasoning = gr.Markdown("🔍 *Reasoning will appear here*")
            answer = gr.Markdown("📌 *Answer will appear here*")
        with gr.Row():
            question_buttons = []
            for q in sample_questions:
                btn = gr.Button(q)
                btn.click(fn=fill_question, inputs=[], outputs=question_box)
                question_buttons.append(btn)
        with gr.Row():
            gr.Markdown(f"**File**: `{file_info['name']}` | 📝 {file_info['description']} | 📐 {file_info['rows']} rows × {file_info['columns']} columns")

    # ----------- Button Logic -----------
    submit_btn.click(read_csv, inputs=file_input, outputs=[eda_screen, upload_screen, file_name])
    # submit_btn.click(upload_file, inputs=file_input, outputs=[upload_screen, file_name])
    eda_btn.click(perform_basic_eda, outputs=[eda_output, next_btn])
    next_btn.click(move_to_qa, outputs=[eda_screen, qa_screen])
    ask_btn.click(fn=ask_question, inputs=question_box, outputs=[reasoning, answer])

    df_store = {}  # dictionary to store file-wise dataframes

    def read_csv(file_obj):
        try:
            file_obj.seek(0)  # important for notebook file input
            df = pd.read_csv(file_obj)
            df_store["df"] = df
            return gr.update(visible=True), gr.update(visible=False), file_obj.name
        except Exception as e:
            return gr.update(visible=False), gr.update(visible=True), f"Error: {str(e)}"





In [35]:
# Run the app
if __name__ == "__main__":
    demo.launch(server_name="127.0.0.1", server_port=7000, share=False)

Running on local URL:  http://127.0.0.1:7000

To create a public link, set `share=True` in `launch()`.


In [36]:
demo.close()

Closing server running on port: 7000
