In [None]:


import json
import pandas as pd
import xml.etree.ElementTree as ET
from pathlib import Path
from langchain.agents import AgentExecutor, create_react_agent
import gradio as gr
from gradio import ChatMessage
from langchain.prompts.chat import ChatPromptTemplate
from langchain.schema import HumanMessage, SystemMessage
from langchain.chat_models import init_chat_model
import getpass
import os
import sys
from langchain_core.runnables import Runnable
from utils.schema_analyzer import SchemaAnalyzer






parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, parent_dir)


if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

llm = init_chat_model("gpt-4o", model_provider="openai")

KB_DIR = Path("../knowledge_base")
SUPPORTED_EXTS = [".csv", ".json", ".xlsx", ".xml"]

schema_modeler_prompt = None
with open("../prompts/schema_modeler/system_prompt.txt", "r") as f:
  schema_modeler = f.read()

schema_modeler_prompt = ChatPromptTemplate.from_messages([
    ("system", schema_modeler),
    ("human", "{input}"),
])

In [1]:


def list_kb_files():
    return [str(p.name) for p in KB_DIR.glob("*") if p.suffix.lower() in SUPPORTED_EXTS]

async def respond(message, chat_history, selected_files, uploaded_files):
    selected_paths = [KB_DIR / name for name in selected_files]
    uploaded_paths = [Path(f.name) for f in uploaded_files or []]
    all_paths = selected_paths + uploaded_paths

    combined_content = []

    for path in all_paths:
        try:
            analyzer = SchemaAnalyzer(str(path))
            schema_result = analyzer.analyze()
            snippets = analyzer.get_file_snippets(n=10)

            schema_str = json.dumps(schema_result, indent=2)
            head_str = snippets.get("head", "")
            middle_str = snippets.get("middle", "")
            tail_str = snippets.get("tail", "")

            file_block = f"""\n=== File: {path.name} ===\n
            📊 Schema:
            {schema_str}

            📄 Head:
            {head_str}

            📄 Middle:
            {middle_str}

            📄 Tail:
            {tail_str}
            """
            combined_content.append(file_block)

        except Exception as e:
            combined_content.append(f"=== File: {path.name} ===\n Error while parsing: {str(e)}\n")


    full_data_context = "\n\n".join(combined_content)
    user_input = f"User question: {message}\n\nThe Schema of the Data is:\n{full_data_context}"
    prompt_messages = schema_modeler_prompt.invoke({"input": user_input})
    
  

    buffer = ""
    async for chunk in llm.astream(prompt_messages):
        if hasattr(chunk, "content") and chunk.content:
            buffer += chunk.content
            yield buffer 




with gr.Blocks() as demo:
    gr.Markdown("# 🧠 File-to-LinkML Schema Chatbot")

    with gr.Accordion("📂 Select or upload files", open=False) as inputs_accordion:
        kb_files = gr.CheckboxGroup(
            label="Select files from knowledge_base",
            choices=list_kb_files()
        )
        uploads = gr.File(
            label="Upload your own files",
            file_types=SUPPORTED_EXTS,
            file_count="multiple"
        )

    chatbot = gr.ChatInterface(
        fn=respond,
        title="LinkML Schema Generator",
        type="messages",
        additional_inputs=[kb_files, uploads],
        additional_inputs_accordion=inputs_accordion,
        save_history=True,
        examples=[["Can you generate a LinkML schema that reflects the data structure, its format, and relationships?"]],
    )

demo.launch()



NameError: name 'gr' is not defined