In [None]:
## Step 1: Install Necessary Libraries
!pip install pandas scikit-learn nltk -q

In [None]:
# Step 2: Import Libraries
import pandas as pd
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [None]:
# Step 3: Load Dataset
from google.colab import files
uploaded = files.upload()
data = pd.read_csv(list(uploaded.keys())[0])

Saving AgriBuddy.csv to AgriBuddy.csv


In [None]:
# Step 4: Preprocess Text
nltk.download('stopwords')
nltk.download('punkt')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

def preprocess_text(text):
    if pd.isnull(text):
        return ''  # Handle null values
    text = text.lower()
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word.isalnum()]
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return ' '.join(tokens)

data['Processed_Input'] = data['User_Input'].apply(preprocess_text)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
# Step 5: Split Data
X = data['Processed_Input']
y = data['Intent']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 6: Vectorize Text
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [None]:
# Step 7: Train an Intent Classifier
model = LogisticRegression()
model.fit(X_train_vectorized, y_train)

In [None]:
# Step 8: Evaluate Model
predictions = model.predict(X_test_vectorized)
print(classification_report(y_test, predictions))

                               precision    recall  f1-score   support

         common_diseases_info       1.00      1.00      1.00         6
        growing_duration_info       1.00      1.00      1.00         4
          harvest_season_info       1.00      1.00      1.00         6
            how_to_plant_info       1.00      1.00      1.00        12
 nutritional_information_info       1.00      1.00      1.00         5
                 soil_pH_info       1.00      1.00      1.00         2
               soil_type_info       1.00      1.00      1.00         4
                specific_info       1.00      1.00      1.00        20
          sunlight_needs_info       1.00      1.00      1.00         4
temperature_requirements_info       1.00      1.00      1.00         2

                     accuracy                           1.00        65
                    macro avg       1.00      1.00      1.00        65
                 weighted avg       1.00      1.00      1.00        65



In [None]:
# Step 9: Save Model and Vectorizer
with open('chatbot_model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

In [None]:
import random

# Define greetings
greeting_inputs = ['hi', 'hello', 'hey', 'greetings', 'good morning', 'good evening']
greeting_responses = ['Hello! How can I help you?',
                      'Hi there! What can I do for you?',
                      'Hey! How can I assist?',
                      'Greetings! How can I be of service?']

def get_response(user_input):
    # Check for greetings
    if user_input.lower() in greeting_inputs:
        return random.choice(greeting_responses)

    # Continue with normal processing
    if not user_input.strip():
        return "Please provide a valid input."

    processed_input = preprocess_text(user_input)
    vectorized_input = vectorizer.transform([processed_input])

    # Compute similarity scores with training data
    similarity_scores = cosine_similarity(vectorized_input, X_train_vectorized)
    max_similarity = similarity_scores.max()

    # Set a similarity threshold
    similarity_threshold = 0.5

    if max_similarity < similarity_threshold:
        return "I'm sorry, I don't understand that."

    # Predict intent if similarity is above threshold
    intent = model.predict(vectorized_input)[0]

    # Filter training data for the predicted intent
    intent_data = data[data['Intent'] == intent]
    intent_responses = intent_data['Response'].values
    if len(intent_responses) == 0:
        return "I'm sorry, I don't have an answer for that."

    intent_inputs = vectorizer.transform(intent_data['Processed_Input'])
    intent_similarity_scores = cosine_similarity(vectorized_input, intent_inputs)
    best_response_idx = intent_similarity_scores.argmax()

    return intent_responses[best_response_idx]

print("Chatbot is ready! Type 'quit' to exit.")
while True:
    user_input = input("You: ")
    if user_input.lower() in ['quit', 'exit', 'thankyou']:
        print("Goodbye! Have a nice day")
        break
    print("Bot:", get_response(user_input))

Chatbot is ready! Type 'quit' to exit.
You: Hi
Bot: Hi there! What can I do for you?
You: How to plant a Banana?
Bot: To plant Banana, prepare the soil by loosening it, add compost, and sow the seeds or seedlings at the recommended depth and spacing. Water regularly and ensure proper sunlight.
You: What soil type should I use in Banana?
Bot: The soil type is: Loamy/Sandy Loam.
You: Harvest season of banana?
Bot: The harvest season is: March-May.
You: How many calories does banana have?
Bot: Banana contains 89 calories per 100g.
You: What are the health benefits of  Banana?
Bot: Banana is rich in nutrients and offers various health benefits.
You: How do I know when Banana is ready to harvest?
Bot: Banana is usually ready to harvest in 270-365 days.
You: What is airplane?
Bot: I'm sorry, I don't understand that.
You: Thankyou
Goodbye! Have a nice day
