In [1]:
# Basic dependencies
import numpy as np
import pandas as pd

# Text preprocessing
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

# ML tools
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Download necessary NLTK resources (run once)
#nltk.download('punkt')
#nltk.download('stopwords')
#nltk.download('wordnet')
#nltk.download('punkt_tab')

In [3]:
import json

with open("farmer_chatbot_intents.json", "r") as f:
    data = json.load(f)

# Flatten into dataframe
patterns = []
tags = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        patterns.append(pattern)
        tags.append(intent['tag'])

df = pd.DataFrame({"pattern": patterns, "tag": tags})
df.head()


Unnamed: 0,pattern,tag
0,Which crop should I grow this season?,crop_recommendation
1,Suggest me a good crop to plant now,crop_recommendation
2,Best crop to sow this year,crop_recommendation
3,What should I cultivate in my farm?,crop_recommendation
4,Tell me which crop fits my land,crop_recommendation


In [4]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(w) for w in tokens if w not in stop_words]
    return " ".join(tokens)

df["clean_pattern"] = df["pattern"].apply(clean_text)
df.head()


Unnamed: 0,pattern,tag,clean_pattern
0,Which crop should I grow this season?,crop_recommendation,crop grow season
1,Suggest me a good crop to plant now,crop_recommendation,suggest good crop plant
2,Best crop to sow this year,crop_recommendation,best crop sow year
3,What should I cultivate in my farm?,crop_recommendation,cultivate farm
4,Tell me which crop fits my land,crop_recommendation,tell crop fit land


In [73]:
# TF-IDF vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["clean_pattern"])
y = df["tag"]

print("Feature matrix shape:", X.shape)


Feature matrix shape: (396, 453)


In [74]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Train size:", X_train.shape[0])
print("Test size:", X_test.shape[0])


Train size: 316
Test size: 80


In [75]:
# Logistic Regression baseline classifier
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

print("Training complete ✅")

Training complete ✅


In [76]:
y_pred = model.predict(X_test)

print("Classification Report:\n")
print(classification_report(y_test, y_pred, zero_division=0))

print("Confusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))


Classification Report:

                             precision    recall  f1-score   support

        crop_recommendation       0.62      0.73      0.67        11
          disease_detection       0.75      0.55      0.63        11
                   fallback       0.56      0.82      0.67        11
fertilizer_pesticide_advice       1.00      0.82      0.90        11
                  greetings       1.00      0.67      0.80         6
          market_price_info       1.00      1.00      1.00        10
                soil_health       0.82      0.90      0.86        10
              weather_query       0.78      0.70      0.74        10

                   accuracy                           0.78        80
                  macro avg       0.82      0.77      0.78        80
               weighted avg       0.80      0.78      0.78        80

Confusion Matrix:

[[ 8  2  1  0  0  0  0  0]
 [ 5  6  0  0  0  0  0  0]
 [ 0  0  9  0  0  0  0  2]
 [ 0  0  0  9  0  0  2  0]
 [ 0  0  2  0  4  

In [6]:
import joblib

model = joblib.load("intent_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

def predict_intent(user_input):
    cleaned = clean_text(user_input)
    vec = vectorizer.transform([cleaned])
    tag = model.predict(vec)[0]
    return tag

# Test it
print(predict_intent("Hi?"))

greetings


In [89]:
def get_response(user_input):
    tag = predict_intent(user_input)
    # Find corresponding responses
    for intent in data["intents"]:
        if intent["tag"] == tag:
            return np.random.choice(intent["responses"])

# Try it
print(get_response("Hello"))
print(get_response("What is the price for crop"))


Hello! How can I assist you with your farming needs today?
I can check market prices for you. Please specify the crop and location.


In [88]:
import joblib

joblib.dump(model, "intent_model.pkl")

joblib.dump(vectorizer, "vectorizer.pkl")

print("✅ Model and vectorizer saved successfully!")


✅ Model and vectorizer saved successfully!
