In [1]:
# 🧩 Install Dependencies (only once)
%pip install transformers datasets sentence-transformers langdetect googletrans==4.0.0-rc1 scikit-learn hf_xet


Note: you may need to restart the kernel to use updated packages.




In [3]:
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
from sentence_transformers import SentenceTransformer
from langdetect import detect
import json

In [4]:
with open("farmer_data.json", "r", encoding='utf-8') as f:
    farmer_ds = json.load(f)

sense_inventory = {
    "plant_1": "a living organism such as a tree or vegetable",
    "plant_2": "a manufacturing facility",
    "spray_1": "to apply a liquid in mist form",
    "spray_2": "a container for spraying liquid",
    "feed_1": "to give food to animals",
    "feed_2": "material supplied to a machine"
}


In [6]:
ner_model_name = "Davlan/bert-base-multilingual-cased-ner-hrl"
tokenizer = AutoTokenizer.from_pretrained(ner_model_name)
ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name)
ner_pipeline = pipeline("ner", model=ner_model, tokenizer=tokenizer, grouped_entities=True)
wsd_model = SentenceTransformer('all-MiniLM-L6-v2')



Device set to use cpu


In [7]:
def detect_language(text):
    try:
        return detect(text)
    except:
        return "unknown"


from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

def disambiguate_word(context_sentence, word, sense_inventory):
    senses = [k for k in sense_inventory if k.startswith(word)]
    if not senses:
        return None

    context_embedding = model.encode(context_sentence, convert_to_tensor=True)
    best_sense, best_score = None, -1
    for sense_key in senses:
        gloss = sense_inventory[sense_key]
        gloss_embedding = model.encode(gloss, convert_to_tensor=True)
        score = util.pytorch_cos_sim(context_embedding, gloss_embedding).item()
        if score > best_score:
            best_score = score
            best_sense = sense_key
    return best_sense




In [8]:
def fallback_ner(text):
    keywords = {
        "LOC": ["village", "Punjab", "Maharashtra", "pond", "field"],
        "PROD": ["pesticide", "spray", "crops", "wheat", "rice", "urea"],
        "ANIMAL": ["goats", "chickens", "fish"],
        "DISEASE": ["blight", "whiteflies", "झुलसा रोग"]
    }
    entities = []
    for label, terms in keywords.items():
        for term in terms:
            if term.lower() in text.lower():
                entities.append((term, label))
    return entities


In [9]:
for data in farmer_ds:
    query = data['question']
    response = data['answer']
    language = detect(query)

    # NER
    ner_entities = ner_pipeline(query)
    clean_ner = [(ent['word'], ent['entity_group']) for ent in ner_entities]
    fallback_entities = fallback_ner(query)
    all_entities = list({(w, l) for (w, l) in clean_ner + fallback_entities})

    # WSD
    disambigs = {}
    for word in data['ambiguous']:
        sense = disambiguate_word(query, word, sense_inventory)
        if sense:
            disambigs[word] = sense

    # 🖨️ Display
    print(f" User Query: {query}")
    print(f" Chatbot Answer: {response}")
    print(f" Detected Language: {language}")
    print(f"\n\n NER Entities: {all_entities}")
    if disambigs:
        for word, sense in disambigs.items():
            print(f"\n Disambiguated '{word}': {sense}")
    print("\n" + "-"*80 + "\n")


 User Query: The plant is not growing properly.
 Chatbot Answer: Ensure it gets enough water and sunlight.
 Detected Language: en


 NER Entities: []

 Disambiguated 'plant': plant_1

--------------------------------------------------------------------------------

 User Query: There is a pesticide plant near my village.
 Chatbot Answer: Yes, it's a pesticide manufacturing facility.
 Detected Language: en


 NER Entities: [('pesticide', 'PROD'), ('village', 'LOC')]

 Disambiguated 'plant': plant_1

--------------------------------------------------------------------------------

 User Query: Spray the pesticide in the evening.
 Chatbot Answer: Evening spray reduces evaporation loss.
 Detected Language: en


 NER Entities: [('pesticide', 'PROD'), ('spray', 'PROD')]

 Disambiguated 'spray': spray_2

--------------------------------------------------------------------------------

 User Query: I bought a new spray for my crops.
 Chatbot Answer: Check the nozzle size for better coverage.
 