### Packages

In [1]:
import gradio as gr
import os 
import re
import autogen
from autogen import AssistantAgent,ChatResult
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from pymongo import MongoClient
import mlflow
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.schema import Document
from llama_index.embeddings.openai import OpenAIEmbedding
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from llama_index.core.retrievers import VectorIndexRetriever
 
import gradio as gr
import pandas as pd
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
## Openai - api key
 
import openai
openai.api_key =  "######"
 

In [3]:
def parse_summary(summary_out):
    scores = re.findall(r'- ([\w\s]+): (\d+)', summary_out)
    score_df = pd.DataFrame(scores, columns=["Metric", "Score"])

    judgment = re.search(r'\*\*Judgment\*\*: (.+)', summary_out)
    reason = re.search(r'\*\*Reason\*\*: (.+)', summary_out, re.DOTALL)

    return (
        judgment.group(1).strip() if judgment else "N/A",
        score_df,
        reason.group(1).strip() if reason else "N/A"
    )

In [4]:
import sqlite3
import pandas as pd
mlflow.set_experiment("AI_SQL_Generation")
 

def execute_sql_query(query, db_path="spider_db/spider_db_baseball.db"):
    """Execute SQL query and track execution metrics using MLflow."""
    try:
        with mlflow.start_run(run_name="Execute_SQL_Query"):
            mlflow.log_param("query", query)
        
        conn = sqlite3.connect(db_path)
        df = pd.read_sql_query(query, conn)

        # Save DataFrame to a CSV file locally
        csv_path = "sql_result.csv"
        df.to_csv(csv_path, index=False)

        with mlflow.start_run(run_name="SQL_Execution_Results"):
        # Log the CSV file as an artifact so you can view/download it via the MLflow UI
            mlflow.log_artifact(csv_path, artifact_path="sql_results")
 
        
        return df, None
    except Exception as e:
        with mlflow.start_run(run_name="SQL_Error_Logging"):
            mlflow.log_param("error", str(e))
        return None, f"❌ SQL Execution Error: {str(e)}"
    finally:
        if 'conn' in locals():
            conn.close()
 

In [5]:

# --------------------- MongoDB Memory Setup --------------------- #
mongo_client = MongoClient("mongodb://127.0.0.1:27017")
 
 
mongo_db = mongo_client["memory_database"]
short_term_collection = mongo_db["session_memory"]
long_term_collection = mongo_db["user_memory"]

# Function to store session-based memory (Short-Term)
def store_short_term_memory(session_id, user_id, query, mood):
    """Stores session-based queries & mood tracking."""
    short_term_collection.update_one(
        {"session_id": session_id},
        {
            "$set": {"current_mood": mood, "user_id": user_id},
            "$push": {"previous_queries": query}  # Append session queries
        },
        upsert=True
    )

# Function to retrieve session memory
def retrieve_short_term_memory(session_id):
    """Fetch session-based query memory and mood tracking."""
    session_data = short_term_collection.find_one({"session_id": session_id})
    if not session_data:
        return {"previous_queries": [], "current_mood": "Neutral"}
    return {
        "previous_queries": session_data.get("previous_queries", []),
        "current_mood": session_data.get("current_mood", "Neutral")
    }

# Function to store long-term user preferences & query history
def store_long_term_memory(user_id, query, mood, preferences=None, demographics=None):
    """Stores persistent user query history & personalization details."""
    long_term_collection.update_one(
        {"user_id": user_id},
        {
            "$set": {
                "last_mood": mood,
                "preferences": preferences,
                "demographics": demographics
            },
            "$push": {"query_history": query}  # Append to past queries
        },
        upsert=True
    )

# Function to retrieve long-term memory
def retrieve_long_term_memory(user_id):
    """Fetch user preferences, expertise level, and past queries."""
    user_data = long_term_collection.find_one({"user_id": user_id})
    if not user_data:
        return None
    return {
        "preferences": user_data.get("preferences", {}),
        "demographics": user_data.get("demographics", {}),
        "query_history": user_data.get("query_history", []),
        "last_mood": user_data.get("last_mood", "Neutral")
    }

In [6]:
from textblob import TextBlob

def detect_mood_from_query(query):
    """Infer user mood using sentiment analysis."""
    sentiment_score = TextBlob(query).sentiment.polarity
    with mlflow.start_run(run_name="Mood_Analysis"):
        mlflow.log_param("query_text", query)
        mlflow.log_metric("sentiment_score", sentiment_score)
    return "Curious" if sentiment_score > 0.3 else "Frustrated" if sentiment_score < -0.3 else "Neutral"

In [7]:
 


# === Step 1: Configure OpenAI Embeddings ===
Settings.embed_model = OpenAIEmbedding()  # or ada-002, etc.

# === Step 2: Load Documents from Directory ===
directory_docs = SimpleDirectoryReader("./docs").load_data()

# === Step 3: Load SQLite Schema as Document ===
def load_sqlite_schema_as_docs(sqlite_path):
    conn = sqlite3.connect(sqlite_path)
    cursor = conn.cursor()

    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()

    documents = []
    for (table_name,) in tables:
        cursor.execute(f"PRAGMA table_info({table_name})")
        columns = cursor.fetchall()
        schema_text = f"Table: {table_name}\n"
        for col in columns:
            schema_text += f"  - {col[1]} ({col[2]})\n"
        documents.append(Document(text=schema_text))

    conn.close()
    return documents

# === Step 4: Load SQLite documents ===
sqlite_docs = load_sqlite_schema_as_docs("spider_db/spider_db_baseball.db")  # your path

# === Step 5: Combine both sets of documents ===
all_documents = directory_docs + sqlite_docs

# === Step 6: Create the index ===
index = VectorStoreIndex.from_documents(all_documents)


In [8]:
def submit_action(user_query, session_id,user_id):
    config_list = {
    "model": "llama-3.3-70b-versatile",
    "api_key": "##########",
    "api_type": "groq"
    }
    os.environ["AUTOGEN_USE_DOCKER"] = "False"

    session_data = retrieve_short_term_memory(session_id)
    previous_queries = session_data["previous_queries"]
    session_mood = session_data["current_mood"]

     # Retrieve long-term memory (past interactions)
    user_context = retrieve_long_term_memory(user_id)
    if user_context is None:
        user_context = {}
    long_term_queries = user_context.get("query_history", [])
    preferences = user_context.get("preferences", {})
    demographics = user_context.get("demographics", {})
    last_mood = user_context.get("last_mood", "Neutral")

    # Detect mood dynamically from user query
    current_mood = detect_mood_from_query(user_query)

    # Update memory stores
    store_short_term_memory(session_id, user_id, user_query, current_mood)
    store_long_term_memory(user_id, user_query, current_mood, preferences, demographics)

     

    # Step 2: Create a retriever from that index
    retriever = VectorIndexRetriever(index=index)

    assistant = AssistantAgent(
    name="assistant",
    llm_config=config_list,
    system_message=(
            f"You are an AI assistant generating SQL queries.\n"
            f"User's previous session queries: {previous_queries}\n"
            f"Long-term query history: {long_term_queries}\n"
            f"Current mood: {current_mood}, previous moods: {last_mood}\n"
            f"Preferred format: {preferences.get('preferred_format', 'table')}\n"
            f"User expertise level: {demographics.get('experience_level', 'Intermediate')}\n"
            "Generate responses tailored to query patterns, mood, and expertise level."
        )
 
    )
    ragproxyagent = RetrieveUserProxyAgent(
        name="ragproxyagent",
        retrieve_config={
            "task": "qa",
            "custom_retriever": retriever,
            "overwrite": True
    
        },
   
    )
    assistant.reset()
    ragproxyagent.reset()
    with mlflow.start_run(run_name="LLM_Query_Generation"):
        mlflow.log_param("user_query", user_query)

    
    result =ragproxyagent.initiate_chat(assistant, message=ragproxyagent.message_generator,
                                        max_round=2,problem=user_query,
                                        speaker_selection_method="round_robin",  # Ensures agents take turns
        allow_repeat_speaker=False,
        max_turns=1)
    for message in result.chat_history:
        if message["role"] == "user" and message.get("name") == "assistant":
            content = message["content"]
            # Extract everything between ```sql and ``` (if that format is used)
            if "```sql" in content and "```" in content:
                start = content.index("```sql") + len("```sql")
                end = content.index("```", start)
                generated_query = content[start:end].strip()
            else:
                generated_query = content.strip()
            break
         
    validator = AssistantAgent(
    name="validator",
    llm_config=config_list,
    system_message=(
        f"You are a SQL Validator. Review the {generated_query} query and validate it against the provided schema. "
        "Check for syntax errors, missing columns/tables, and logical issues. "
        "Respond with one of the following:\n\n"
        "- ✅ Valid SQL: If the query is correct.\n"
        "- ❌ Invalid SQL: If the query has issues. Explain the problem clearly."
        ),
    
    )
    
    judge = AssistantAgent(
        name="judge",
        system_message=(
            f"You are a Judge Agent. Review the user question, the generated SQL query, and the validator’s feedback. "
            "Evaluate the query based on the following 6 metrics and provide scores between 0 and 10:\n\n"
            
            "1. Accuracy (0–10): Does the AI-generated SQL return the same results as the reference SQL when executed on the same database?\n"
            "- 10: Identical results\n"
            "- 5: Partial match (e.g., missing filters, subset/superset of rows)\n"
            "- 0: Incorrect or different logic\n\n"
            
            "2. Efficiency (0–10): Evaluate the query's performance in terms of execution time, indexing, and join optimization.\n"
            "- 10: Highly efficient (<1s, optimized with indexes and joins)\n"
            "- 6: Moderate (1–3s, room for improvement)\n"
            "- 3: Low efficiency (>3s, full scans or nested loops)\n\n"
            
            "3. Hallucination (0–10): Check for invalid elements like non-existent tables, columns, or illogical clauses.\n"
            "- 0: No hallucination\n"
            "- 5: Minor (1–2 issues)\n"
            "- 10: Major hallucinations (e.g., many or critical invalid references)\n\n"
            
            "4. Completeness (0–10): Does the query fully answer the user’s question?\n"
            "- 10: Fully complete\n"
            "- 5: Partially complete (some logic or filters missing)\n"
            "- 0: Incomplete or irrelevant\n\n"
            
            "5. Structure Similarity (0–10): Are similar joins, filters, subqueries, and groupings used as in the reference?\n"
            "- 10: Very similar\n"
            "- 6: Moderately similar\n"
            "- 2: Very different\n\n"
            
            "6. Readability & Maintainability (0–10): Is the SQL query easy to read, well-formatted, and maintainable?\n"
            "- 10: Excellent\n"
            "- 7: Good\n"
            "- 4: Fair\n"
            "- 1: Poor\n\n"
            
            "7. Overall Score (0–10): Compute using the formula:\n"
            "   score = round(0.3 * accuracy + 0.15 * efficiency + 0.15 * (10 - hallucination) + "
            "0.2 * completeness + 0.1 * structure_similarity + 0.1 * readability)\n\n"
            
            "Respond in this format:\n"
            "**Judgment**: Good / Needs Improvement\n"
            "**Scores**:\n"
            "- Accuracy: X\n"
            "- Efficiency: X\n"
            "- Hallucination: X\n"
            "- Completeness: X\n"
            "- Structure Similarity: X\n"
            "- Readability: X\n"
            "- Overall Score: X\n"
            "**Reason**: <Brief explanation>"
        ),
        llm_config=config_list,
    )
    
    
    
    groupchat = autogen.GroupChat(
        agents=[validator,judge],
        messages=[],
        max_round=5,
        speaker_transitions_type="allowed",
        speaker_selection_method="round_robin",  # Ensures agents take turns
        allow_repeat_speaker=False
        )
    
    manager = autogen.GroupChatManager(
            groupchat=groupchat, llm_config={"config_list": config_list},
            is_termination_msg=lambda msg: (
            isinstance(msg, dict) and
            msg.get("name") == "judge" and
            bool(msg.get("content"))  # Ensure content is not empty
        )
        )
    chat_result = manager.initiate_chat(
        manager,
        max_turns=1,
        message = f"Analyze the sql query : {generated_query}"
        )
    judge_entry = next(
    (msg['content'] for msg in groupchat.messages if msg['name'] == 'judge'),
    None
    )

    if judge_entry:
        # Save the judge's feedback to a file
        with open("judge_evaluation.txt", "w") as f:
            f.write(judge_entry)
        
        # Log the feedback artifact and optionally, metrics (if you parse them)
        with mlflow.start_run(run_name="LLM_Evaluation_Results"):
            mlflow.log_artifact("judge_evaluation.txt", artifact_path="evaluation")
            
            # Additional: If you can parse out numeric metrics, log them:
            # For example, assume you extract metrics into a dictionary:
            # evaluation_metrics = {"accuracy": 8, "efficiency": 7, ...}
            # for key, value in evaluation_metrics.items():
            #     mlflow.log_metric(key, value)
    else:
        print("No entry found from 'judge'")
   
    df,error = execute_sql_query(generated_query)
    
    if error:
        return generated_query, "Query failed.", pd.DataFrame(), error, []  # or however you want to handle



    
    store_short_term_memory(session_id, user_id, generated_query, current_mood)
    data_analyst = AssistantAgent(
    name="data_analyst",
    llm_config=config_list,
    system_message=(
        "You are a Data Analyst Agent.\n"
        "After retrieving the data:\n"
        "1. If the data supports a clear report (like customer details, summaries, etc.), generate a short and also detailed analytical report.\n"
        "2. If the data supports visual representation (like trends, numerical summaries, categories), suggest a few types of graphs and describe what each would show.\n"
        "3. If the data is minimal or purely textual, generate a brief plain text summary. And also need the detailed summary also\n\n"
        "If it is possible to create graphs based on the data, suggest the following types of graphs (if applicable):\n"
        "    - Bar Chart: To show frequency or counts of categorical data.\n"
        "    - Line Chart: To show trends over time.\n"
        "    - Pie Chart: To represent proportions or parts of a whole.\n"
        "    - Histogram: To visualize the distribution of numerical data.\n"
        "    - Box Plot: To visualize the distribution and identify outliers.\n\n"
        "If the data supports any of these graphs, provide the Python code to generate the graph and save it as a .jpg image. If not, simply state that no graphical representation is possible and provide only the analysis and summary of the data.\n\n"
        "Be concise, professional, and assume the reader is a business stakeholder.\n"
        "the data frame name should be df and no need to initialize"
        "Analysed Relevent and unique information  need in should be in table format. no code"
        "graph image location should be graph/ then the image name"
        "Format your output as follows:\n\n"
        "**Query**:\n<query>\n\n"
        "**Data Snapshot**:\n<first 5 rows of the data>\n\n"
        "**Analysis**:\n<brief summary>\n\n"
        "**Detailed Analysis**:\n<Analysed Relevent and unique information  need in should be in table format. no code>\n\n"
        "**Report or Graph Suggestions**:\n<graph recommendations and corresponding Python code, or 'No graphical representation is possible'>"
        )
    )
    
    
    message_text = (
        f"**SQL Query**:\n{generated_query}\n\n"
        f"**Fetched Data (Top Rows)**:\n{df}\n\n"
        "Please analyze the data accordingly."
    )
    
    data_analyst.reset()
    chat_results = data_analyst.initiate_chat(data_analyst, message=message_text,max_turns=1)
    graph_images = extract_and_run_graphs(chat_results.chat_history, df)
    graph_suggestions = extract_graph_suggestions(chat_results.chat_history)
    graph_types = parse_graph_types(graph_suggestions)
    print('000000000000000000000000000000000',graph_types)
 
    
    return generated_query,judge_entry,df,graph_images,gr.update(choices=graph_types), chat_results.summary

In [9]:

def extract_and_run_graphs(chat_history, df):
    python_code_blocks = []
    saved_images = []

    # Regex to extract Python code blocks
    code_block_pattern = re.compile(r"```python(.*?)```", re.DOTALL)

    # Loop through chat messages and extract Python code
    for message in chat_history:
        if message.get("name") != "data_analyst":
            continue

        content = message.get("content", "")
        matches = code_block_pattern.findall(content)

        for code in matches:
            code = code.strip()
            python_code_blocks.append(code)

    # Execute each code block in a shared context with 'df' available
    exec_globals = {"df": df, "plt": plt, "os": os}
    for i, code_block in enumerate(python_code_blocks):
        print(f"\n▶ Executing graph code block {i + 1}:\n{code_block}\n")
        try:
            # Attempt to detect and capture any plt.savefig(...) call for image path
            save_match = re.search(r"plt\.savefig\(['\"](.*?)['\"]\)", code_block)
            if save_match:
                filename = save_match.group(1)
            else:
                filename = None

            exec(code_block, exec_globals)

            if filename and os.path.exists(filename):
                saved_images.append(filename)

        except Exception as e:
            print(f"❌ Error in graph code block {i + 1}: {e}")

    return saved_images  # You can return this list to show in Gradio or elsewhere


In [10]:
import matplotlib.pyplot as plt
def extract_graph_suggestions(chat_history):
    # Extract graph suggestions from the chat history (or wherever the suggestion comes from)
    suggestions = ""
    for message in chat_history:
        if "Report or Graph Suggestions" in message.get("content", ""):
            suggestions = message["content"]
            break
    return suggestions



def parse_graph_types(suggestions):
    # Example of extracting graph types from the suggestion text
    graph_types = []
    if "Bar Chart" in suggestions:
        graph_types.append("Bar Chart")
    if "Pie Chart" in suggestions:
        graph_types.append("Pie Chart")
    if "Line Chart" in suggestions:
        graph_types.append("Line Chart")
    if "Box Plot" in suggestions:
        graph_types.append("Box Plot")
    return graph_types
def create_custom_chart(df, chart_type, x_col, y_col):
    # Ensure that the columns you want to plot do not contain NaN or None values
    df = df.dropna(subset=[x_col, y_col])  # Remove rows where either x or y is NaN or None
    
    # Check if the dataframe is empty after dropping NaN values
    if df.empty:
        return "Error: No valid data available for plotting."

    plt.figure(figsize=(10, 6))

    if chart_type == "Bar Chart":
        plt.bar(df[x_col], df[y_col], color='skyblue')
        plt.xlabel(x_col)
        plt.ylabel(y_col)
        plt.title(f"{chart_type}: {y_col} by {x_col}")
    elif chart_type == "Pie Chart":
        data = df.groupby(x_col)[y_col].sum()
        plt.pie(data, labels=data.index, autopct="%1.1f%%")
        plt.title(f"{chart_type}: {y_col} by {x_col}")
    elif chart_type == "Line Chart":
        plt.plot(df[x_col], df[y_col], marker='o', color='b')
        plt.xlabel(x_col)
        plt.ylabel(y_col)
        plt.title(f"{chart_type}: {y_col} by {x_col}")
    elif chart_type == "Box Plot":
        plt.boxplot([df[y_col]], labels=[x_col])
        plt.ylabel(y_col)
        plt.title(f"{chart_type}: {y_col} by {x_col}")
    else:
        return None

    path = "graph/custom_graph.jpg"
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(path)
    plt.close()

    with mlflow.start_run(run_name="Custom_Graph_Generation"):
        mlflow.log_artifact(path, artifact_path="custom_graphs")

    return path
 


In [11]:
 
import matplotlib.pyplot as plt
def extract_graph_suggestions(chat_history):
    # Extract graph suggestions from the chat history (or wherever the suggestion comes from)
    suggestions = ""
    for message in chat_history:
        if "Report or Graph Suggestions" in message.get("content", ""):
            suggestions = message["content"]
            break
    return suggestions

def parse_graph_types(suggestions):
    # Example of extracting graph types from the suggestion text
    graph_types = []
    if "Bar Chart" in suggestions:
        graph_types.append("Bar Chart")
    if "Pie Chart" in suggestions:
        graph_types.append("Pie Chart")
    if "Line Chart" in suggestions:
        graph_types.append("Line Chart")
    if "Box Plot" in suggestions:
        graph_types.append("Box Plot")
    return graph_types
def create_custom_chart(df, chart_type, x_col, y_col):
    # Ensure that the columns you want to plot do not contain NaN or None values
    df = df.dropna(subset=[x_col, y_col])  # Remove rows where either x or y is NaN or None
    
    # Check if the dataframe is empty after dropping NaN values
    if df.empty:
        return "Error: No valid data available for plotting."

    plt.figure(figsize=(10, 6))

    if chart_type == "Bar Chart":
        plt.bar(df[x_col], df[y_col], color='skyblue')
        plt.xlabel(x_col)
        plt.ylabel(y_col)
        plt.title(f"{chart_type}: {y_col} by {x_col}")
    elif chart_type == "Pie Chart":
        data = df.groupby(x_col)[y_col].sum()
        plt.pie(data, labels=data.index, autopct="%1.1f%%")
        plt.title(f"{chart_type}: {y_col} by {x_col}")
    elif chart_type == "Line Chart":
        plt.plot(df[x_col], df[y_col], marker='o', color='b')
        plt.xlabel(x_col)
        plt.ylabel(y_col)
        plt.title(f"{chart_type}: {y_col} by {x_col}")
    elif chart_type == "Box Plot":
        plt.boxplot([df[y_col]], labels=[x_col])
        plt.ylabel(y_col)
        plt.title(f"{chart_type}: {y_col} by {x_col}")
    else:
        return None

    path = "graph/custom_graph.jpg"
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(path)
    plt.close()

    with mlflow.start_run(run_name="Custom_Graph_Generation"):
        mlflow.log_artifact(path, artifact_path="custom_graphs")

    return path
 


def generate_summary_pdf(summary_text, df):
    import os, re, uuid, shutil, tempfile
    from reportlab.lib.pagesizes import letter
    from reportlab.lib import colors
    from reportlab.pdfgen import canvas
    from reportlab.lib.styles import getSampleStyleSheet
    from reportlab.platypus import Table, TableStyle
    from reportlab.lib.utils import ImageReader
    from reportlab.lib.utils import simpleSplit
    import matplotlib.pyplot as plt
    import glob
    for f in glob.glob(os.path.join('graph', "*")):
        try:
            os.remove(f)
        except Exception as e:
            print(f"Could not delete {f}: {e}")
    
    for f in glob.glob(os.path.join('reports', "*")):
        try:
            os.remove(f)
        except Exception as e:
            print(f"Could not delete {f}: {e}")
    
    filename = f"summary_{uuid.uuid4().hex}.pdf"
    filepath = os.path.join("reports", filename)
    os.makedirs("reports", exist_ok=True)
    

    # Setup canvas
    c = canvas.Canvas(filepath, pagesize=letter)
    width, height = letter
    left_margin, top_margin, bottom_margin = 50, 50, 50
    max_width = width - 2 * left_margin
    line_height = 15

    # Draw page border
    c.setLineWidth(1)
    c.rect(20, 20, width - 40, height - 40)

    # Heading
    heading_text = "SQL Query Summary Report"
    c.setFont("Times-BoldItalic", 18)
    c.drawString(left_margin, height - top_margin, heading_text)
    heading_width = c.stringWidth(heading_text, "Times-BoldItalic", 18)
    c.setLineWidth(1)
    c.line(left_margin, height - top_margin - 5, left_margin + heading_width, height - top_margin - 5)
    y = height - top_margin - 35

   
 

    # Extract sections
    sections = {"Query": "", "Data Snapshot": "", "Analysis": "", "Detailed Analysis": ""}
    current = None
    for line in summary_text.split('\n'):
        match = re.match(r"\*\*(.*?)\*\*:", line)
        if match:
            current = match.group(1)
            continue
        if current in sections:
            sections[current] += line + "\n"

    # Write block of wrapped text
    def write_text_block(text):
        nonlocal y
        for line in text.strip().split("\n"):
            wrapped = simpleSplit(line, "Helvetica", 12, max_width)
            for wline in wrapped:
                if y <= bottom_margin:
                    c.showPage()
                    c.setFont("Helvetica", 12)
                    c.rect(20, 20, width - 40, height - 40)
                    y = height - top_margin
                c.drawString(left_margin, y, wline)
                y -= line_height

    # Render sections
    for title, content in sections.items():
        c.setFont("Helvetica-Bold", 12)
        if y <= bottom_margin:
            c.showPage()
            c.setFont("Helvetica", 12)
            c.rect(20, 20, width - 40, height - 40)
            y = height - top_margin
        c.drawString(left_margin, y, title)
        y -= line_height
        c.setFont("Helvetica", 12)

        def draw_wrapped_table(content, styles):
            nonlocal y
            try:
                table_data = [re.split(r'\s*\|\s*', line.strip())[1:-1] for line in content.strip().splitlines() if '|' in line]
                if not table_data:
                    raise ValueError("No valid table data")
                col_count = len(table_data[0])
                col_widths = [max_width / col_count] * col_count
                table = Table(table_data, colWidths=col_widths)
                table.setStyle(styles)
                w, h = table.wrap(max_width, y - bottom_margin)
                if y - h < bottom_margin:
                    c.showPage()
                    c.setFont("Helvetica", 12)
                    c.rect(20, 20, width - 40, height - 40)
                    y = height - top_margin
                table.drawOn(c, left_margin, y - h)
                y -= h + 20
            except Exception as e:
                write_text_block("Failed to render table.")

        if title == "Data Snapshot":
            draw_wrapped_table(content, TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
                ('GRID', (0, 0), (-1, -1), 1, colors.black),
            ]))
        elif title == "Detailed Analysis":
            draw_wrapped_table(content, TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.lightblue),
                ('GRID', (0, 0), (-1, -1), 1, colors.black),
                ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
            ]))
        else:
            write_text_block(content)

    # Extract and execute Python code blocks
    code_blocks = re.findall(r"```python(.*?)```", summary_text, re.DOTALL)
    tempdir = tempfile.mkdtemp()

    for idx, code in enumerate(code_blocks):
        try:
            local_env = {"plt": plt, "df": df[:10]}
            exec(code, local_env)
            for fname in os.listdir("graph"):
                if fname.endswith(".jpg"):
                    img_path = os.path.join("graph", fname)
                    img = ImageReader(img_path)
                    img_width, img_height = img.getSize()
                    aspect = img_height / float(img_width)
                    display_width = max_width
                    display_height = display_width * aspect

                    if y - display_height < bottom_margin:
                        c.showPage()
                        c.setFont("Helvetica", 12)
                        c.rect(20, 20, width - 40, height - 40)
                        y = height - top_margin

                    c.drawImage(img, left_margin, y - display_height, width=display_width, height=display_height)
                    y -= display_height + 20
        except Exception as e:
            write_text_block(f"Do data found for graph ")

    c.save()
    shutil.rmtree(tempdir)
    return filepath


# import random
r1 = random.randint(1, 1000)
DEFAULT_SESSION_ID = "demo_session_"+str(r1)
DEFAULT_USER_ID = "user_"+str(r1)

# r1 = random.randint(1, 1000)
# DEFAULT_SESSION_ID = "demo_session_001"
# DEFAULT_USER_ID = "user_123"

with gr.Blocks() as demo:
    session_id = gr.Textbox(value=DEFAULT_SESSION_ID, visible=False)
    user_id = gr.Textbox(value=DEFAULT_USER_ID, visible=False)
    gr.Markdown("### Tabular Data Processor")
    text_input = gr.Textbox(lines=3, label="Put your Query Here")

    with gr.Row():
        submit_btn = gr.Button("Submit")
        analyze_btn = gr.Button("Analyze")  # Not wired up

    sql_output = gr.Textbox(label="Generated SQL Output")
    summary_output = gr.Textbox(label="Summary Output")

    result_df_output = gr.Dataframe(
        label="SQL Result Table",
        interactive=True,
        row_count=2,
        col_count=(0, "dynamic"),
        wrap=True,
    )

    graph_output = gr.Textbox(label="Graph Suggestions")

    gr.Markdown("### 📊 Generate Custom Graph")

    with gr.Row():
        chart_type = gr.Dropdown(choices=[], label="Graph Type")
        x_column = gr.Dropdown(choices=[], label="X-axis Column")
        y_column = gr.Dropdown(choices=[], label="Y-axis Column")

    update_graph_btn = gr.Button("Generate Custom Graph")
    custom_graph_output = gr.Image(type="filepath", label="Generated Graph")
    data_analysis = gr.Textbox(label="summary", visible=False)

    # Submit and dynamically update controls
    submit_btn.click(
        submit_action,
        inputs=[text_input,session_id, user_id],
        outputs=[sql_output, summary_output, result_df_output, graph_output, chart_type, data_analysis]
    ).then(
        lambda df: (
            gr.update(choices=list(df.columns)),
            gr.update(choices=list(df.select_dtypes(include="number").columns))
        ),
        inputs=result_df_output,
        outputs=[x_column, y_column]
    )

    with gr.Row():
        download_btn = gr.Button("Download Summary as PDF")
        pdf_file_output = gr.File(label="Download PDF", interactive=True, type="filepath")
    
    download_btn.click(
        generate_summary_pdf,
        inputs=[data_analysis,result_df_output],
        outputs=[pdf_file_output]
    )

    
    update_graph_btn.click(
        lambda chart_type, x, y, df: create_custom_chart(df, chart_type, x, y),
        inputs=[chart_type, x_column, y_column, result_df_output],
        outputs=custom_graph_output
    )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://2fbb9a3ee7a7f50617.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


