In [3]:
%pip install deep-translator

Note: you may need to restart the kernel to use updated packages.




In [1]:
# ✅ Imports
import gradio as gr
from sentence_transformers import SentenceTransformer, util
from datetime import datetime
import requests
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from deep_translator import GoogleTranslator # Replacing googletrans
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# ✅ Load environment variables
load_dotenv()
API_KEY = os.getenv("OPENWEATHER_API_KEY")

# ✅ Load Crop Dataset and Train Model
df = pd.read_csv("Crop_recommendation.csv")
X = df[["temperature", "humidity"]]
y = df["label"]

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2)
model = RandomForestClassifier()
model.fit(X_train, y_train)

# ✅ Setup Translator
# lang = single_detection('bonjour la vie', api_key='your_api_key')
translator = GoogleTranslator(source='auto', target='en')
# Default source is auto-detect, target is English

# ✅ NER Setup
ner_model_name = "Davlan/bert-base-multilingual-cased-ner-hrl"
tokenizer = AutoTokenizer.from_pretrained(ner_model_name)
ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name)
ner_pipeline = pipeline("ner", model=ner_model, tokenizer=tokenizer, grouped_entities=True)

# ✅ WSD Setup
wsd_model = SentenceTransformer('all-MiniLM-L6-v2')
sense_inventory = {
    "plant_1": "a living organism like a crop or tree",
    "plant_2": "an industrial facility such as a pesticide plant",
    "spray_1": "the act of spraying liquid on crops",
    "spray_2": "a physical pesticide product in a bottle"
}

def disambiguate_word(context_sentence, word, sense_inventory):
    senses = [k for k in sense_inventory if k.startswith(word)]
    if not senses:
        return None
    context_embedding = wsd_model.encode(context_sentence, convert_to_tensor=True)
    best_sense, best_score = None, -1
    for sense_key in senses:
        gloss = sense_inventory[sense_key]
        gloss_embedding = wsd_model.encode(gloss, convert_to_tensor=True)
        score = util.pytorch_cos_sim(context_embedding, gloss_embedding).item()
        if score > best_score:
            best_score = score
            best_sense = sense_key
    return best_sense

# ✅ Extract city
def extract_location(text):
    entities = ner_pipeline(text)
    for ent in entities:
        if ent["entity_group"] == "LOC":
            return ent["word"]
    return None

# ✅ Weather Fetching
def get_weather_data(location, api_key):
    try:
        url = f"http://api.openweathermap.org/data/2.5/weather?q={location}&appid={api_key}&units=metric"
        response = requests.get(url)
        data = response.json()
        if data.get("cod") != 200:
            return None, None, None
        weather_desc = data['weather'][0]['description']
        temperature = data['main']['temp']
        humidity = data['main']['humidity']
        return weather_desc, temperature, humidity
    except Exception as e:
        print(f"Weather API error: {e}")
        return None, None, None

# ✅ Main Bot Logic
def query_bot(user_query):

    # lang = single_detection(user_query, api_key='d5c60fec2be48ef146e29fb217a8933c')
    # translator = GoogleTranslator(source='auto', target=lang)
    # 🔁 Detect language and translate to English
    translated_query = translator.translate(user_query)  # Automatically detects language
    
    # 🔍 WSD
    ambiguous_words = ["plant", "spray"]
    for word in ambiguous_words:
        if word in translated_query.lower():
            sense = disambiguate_word(translated_query, word, sense_inventory)
            if sense and sense.endswith("2"):  # pesticide factory or spray product
                response = "Did you mean a factory or a product? For crops, please clarify."
                return translator.translate(response)  # Translate back to original language

    # 🏙 Detect city
    city = extract_location(translated_query)
    if not city:
        return translator.translate("Please mention your city in the question.")

    # 🌦 Get weather
    weather_desc, temp, humidity = get_weather_data(city, API_KEY)
    if temp is None:
        return translator.translate(f"Couldn't fetch weather info for {city}.")

    # 🧠 Decide: weather or crop
    if "weather" in translated_query.lower() or "temperature" in translated_query.lower():
        response = f"🌦 Weather in {city}: {weather_desc}, Temperature: {temp}°C, Humidity: {humidity}%"
    else:
        input_data = np.array([[temp, humidity]])
        crop_encoded = model.predict(input_data)[0]
        crop_label = label_encoder.inverse_transform([crop_encoded])[0]
        response = f" Weather in {city}: {weather_desc}\n, {temp}°C\n Recommended crop: {crop_label}"

    # 🔁 Translate response back to user's language
    return translator.translate(response)

# ✅ Gradio UI
gr.Interface(
    fn=query_bot,
    inputs=gr.Textbox(label="Ask your query (in any language)"),
    outputs=gr.Textbox(label="Response"),
    title="🌾 Multilingual Crop & Weather Bot",
    description="Ask about crops or weather in your city, in any language!"
).launch()


  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


