In [13]:

import json
import pandas as pd
import xml.etree.ElementTree as ET
from pathlib import Path
from langchain.agents import AgentExecutor, create_react_agent
import gradio as gr
from gradio import ChatMessage
from langchain.prompts.chat import ChatPromptTemplate
from langchain.schema import HumanMessage, SystemMessage
from langchain.chat_models import init_chat_model
import getpass
import os
import sys
from langchain_core.runnables import Runnable
from utils.schema_analyzer import SchemaAnalyzer
from utils.schema_modeler_agent import SchemaModeler



if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

llm = init_chat_model("gpt-4o", model_provider="openai")


schema_modeler_agent = SchemaModeler(llm)


KB_DIR = Path("knowledge_base")
SUPPORTED_EXTS = [".csv", ".json", ".xlsx", ".xml"]


In [None]:


def list_kb_files():
    return [str(p.name) for p in KB_DIR.glob("*") if p.suffix.lower() in SUPPORTED_EXTS]

async def respond(message, chat_history, selected_files, uploaded_files):
    selected_paths = [KB_DIR / name for name in selected_files]
    uploaded_paths = [Path(f.name) for f in uploaded_files or []]

    async for chunk in schema_modeler_agent.get_schema(
        selected_files=selected_files,
        uploaded_files=uploaded_files
    ):
        yield chunk
    
    
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 File-to-LinkML Schema Chatbot")

    with gr.Accordion("📂 Select or upload files", open=False) as inputs_accordion:
        kb_files = gr.CheckboxGroup(
            label="Select files from knowledge_base",
            choices=list_kb_files()
        )
        uploads = gr.File(
            label="Upload your own files",
            file_types=SUPPORTED_EXTS,
            file_count="multiple"
        )

    chatbot = gr.ChatInterface(
        fn=respond,
        title="LinkML Schema Generator",
        type="messages",
        additional_inputs=[kb_files, uploads],
        additional_inputs_accordion=inputs_accordion,
        save_history=True,
        examples=[["Can you generate a LinkML schema that reflects the data structure, its format, and relationships?"]],
    )

demo.launch()



* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.


