In [2]:
!pip install transformers sentence-transformers gradio

import gradio as gr
import torch
import json
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    T5Tokenizer, T5ForConditionalGeneration
)
from sentence_transformers import SentenceTransformer, util



In [3]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [9]:
# Stage 1: Doctype Classifier (RoBERTa)
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
s1_model = "hyrinmansoor/text2frappe-s1-roberta"
tokenizer_s1 = RobertaTokenizerFast.from_pretrained(s1_model)
model_s1 = RobertaForSequenceClassification.from_pretrained(s1_model)

# Load label mapping from uploaded file
# with open("/content/standard_meta.json") as f:
#     meta = json.load(f)

sbert = SentenceTransformer("hyrinmansoor/text2frappe-s2-sbert")

s2_model = "hyrinmansoor/text2frappe-s2-flan-field"
tokenizer_s2 = T5Tokenizer.from_pretrained(s2_model)
model_s2 = T5ForConditionalGeneration.from_pretrained(s2_model)

s3_model = "hyrinmansoor/text2frappe-s3-flan_query_generator"
tokenizer_s3 = T5Tokenizer.from_pretrained(s3_model)
model_s3 = T5ForConditionalGeneration.from_pretrained(s3_model)

with open("/content/id2label") as f:
    id2label = json.load(f)

def predict_doctype(question):
    try:
        inputs = tokenizer_s1(question, return_tensors="pt", truncation=True)
        with torch.no_grad():
            outputs = model_s1(**inputs)
        pred_id = str(outputs.logits.argmax().item())
        print("🔢 Predicted ID:", pred_id)

        predicted_doctype = id2label.get(pred_id, "Unknown")

        print("📄 Predicted Doctype:", predicted_doctype)
        return predicted_doctype

    except Exception as e:
        print("❌ Exception in predict_doctype:", str(e))
        return "Unknown"



# def get_top_k_fields(question, doctype, k=4):
#     if doctype not in meta or "fields" not in meta[doctype]:
#         raise ValueError(f"❌ No metadata or fields found for doctype '{doctype}'")

#     fields_dict = meta[doctype]["fields"]

#     if not isinstance(fields_dict, dict):
#         raise TypeError(f"❌ Expected dict for fields of '{doctype}', but got {type(fields_dict)}")

#     field_names = list(fields_dict.keys())
#     field_descs = [f"{key}: {desc}" if desc else key for key, desc in fields_dict.items()]

#     # Construct SBERT prompt
#     sbert_prompt = f"Doctype: {doctype}\nQuestion: {question}"

#     # SBERT similarity
#     embeddings = sbert.encode([sbert_prompt] + field_descs, convert_to_tensor=True)
#     query_emb = embeddings[0]
#     field_embs = embeddings[1:]
#     sim_scores = util.pytorch_cos_sim(query_emb, field_embs)[0]
#     top_k = torch.topk(sim_scores, k=min(k, len(field_names)))

#     return [field_names[i] for i in top_k.indices]


# def select_fields_with_flan(doctype, question, top_fields):
#     prompt = f"Instruction: Select only the correct field(s) from the given top fields that answer the question.\nDoctype: {doctype}\nQuestion: {question}\nTop Fields: {', '.join(top_fields)}"
#     input_ids = tokenizer_s2(prompt, return_tensors="pt").input_ids
#     output_ids = model_s2.generate(input_ids, max_length=64)
#     return tokenizer_s2.decode(output_ids[0], skip_special_tokens=True).split(", ")

# def generate_frappe_query(doctype, question, fields):
#     prompt = f"Generate the correct Frappe query for the given question, using the provided doctype and fields.\nDoctype: {doctype}\nQuestion: {question}\nFields: {', '.join(fields)}"
#     input_ids = tokenizer_s3(prompt, return_tensors="pt").input_ids
#     output_ids = model_s3.generate(input_ids, max_length=128)
#     return tokenizer_s3.decode(output_ids[0], skip_special_tokens=True)

def full_pipeline(question):
    try:
        print("\n📌 Input Question:", question)

        doctype = predict_doctype(question)
        print("✅ Predicted Doctype:", doctype)

        # top_fields = get_top_k_fields(question, doctype)
        # print("🔎 Top Fields (SBERT):", top_fields)

        # selected_fields = select_fields_with_flan(doctype, question, top_fields)
        # print("✅ Selected Fields (FLAN):", selected_fields)

        # frappe_query = generate_frappe_query(doctype, question, selected_fields)
        # print("🧾 Generated Frappe Query:", frappe_query)

        # debug_info = (
        #     f"Question: {question}\n"
        #     f"Doctype: {doctype}\n"
        #     f"Top Fields: {top_fields}\n"
        #     f"Selected Fields: {selected_fields}\n"
        #     f"Query: {frappe_query}"
        # )

        return (
            f"📄 **Predicted Doctype**: {doctype}"
        )


    except Exception as e:
        print("❌ ERROR in full_pipeline:", str(e))
        return (
            "❌ Error in Doctype Prediction",
            "❌ Error in SBERT or Meta Lookup",
            "❌ Error in FLAN Field Selection",
            f"💥 Exception: {str(e)}"
        )


iface = gr.Interface(
    fn=full_pipeline,
    inputs=[
        gr.Textbox(label="❓ Natural Language Question"),
    ],
    outputs=[
        gr.Markdown(label="Doctype Prediction"),

    ],
    title="🔗 Text2Frappe End-to-End Demo",
    description="This pipeline integrates Stage 1 (RoBERTa), Stage 2 Hybrid (SBERT + FLAN), and Stage 3 (FLAN-T5) to convert natural language questions into ERPNext queries."
)
iface.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f7c608b0f63c6afb46.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [13]:
print("185" in id2label_s1)

True


In [14]:
id2label_s1.get(185, "Unknown")

'Unknown'