In [27]:
!pip install nltk numpy scikit-learn pickle5



In [28]:
import nltk
import numpy as np
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [29]:
# Download NLTK data files
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [30]:
# Sample data
data = [
    ("Hello, can you help me bot?", "Hello! How can I assist you today?"),
    ("Hi, I need some assistance.", "Hi there! What do you need help with?"),
    ("What is my name?", "Your name is Prem Radheshyam Mahajan."),
    ("Tell me my name.", "Your name is Prem Radheshyam Mahajan."),
    ("Thank you!", "You're welcome!"),
    ("Thanks a lot.", "You're welcome!"),
    ("What is my favorite way to spend time with friends?",
     "You enjoy spending time with friends by coding together, playing games, or just hanging out and chatting."),
    ("What is my favorite way to exercise?",
     "You prefer to exercise by playing sports like judo and staying active outdoors."),
    ("Which is my favorite memory?",
     "One of your favorite memories is winning the English Speaking Medal in college."),
    ("How can I relax?",
     "You can relax by listening to music, playing games, or going for walks in nature."),
    ("What is my favorite place to visit?",
     "You enjoy visiting places that are rich in history or have beautiful natural landscapes."),
    ("What is my favorite food to eat?",
     "You love dishes like shev bhaji, bhendi, and gobhi matar paneer."),
    ("What is my favorite way to spend my weekends?",
     "During weekends, you catch up on coding projects, hang out with friends, or watch movies."),
    ("Which is my favorite thing to do on a rainy day?",
     "On rainy days, you like to stay indoors, maybe code or watch movies."),
    ("What is my favorite book?",
     "Your favorite book is 'Sapiens: A Brief History of Humankind' by Yuval Noah Harari."),
    ("Tell me about my family.",
     "You have a loving family with supportive parents and a younger sister."),
    ("What is my dream job?",
     "Your dream job involves working as a software engineer at a company that values innovation."),
    ("Which is my favorite season?",
     "Your favorite season is winter because you enjoy the cold weather and festive atmosphere."),
    ("What is my pet's name?",
     "You have a pet cat named Muffin."),
    ("Tell me about my hometown.",
     "Your hometown is known for its rich cultural heritage and vibrant festivals.")
]

In [31]:
# Tokenization and Lemmatization setup
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalpha() and token not in stop_words]
    return ' '.join(lemmatized_tokens)

In [32]:
# Preprocess data
preprocessed_data = [(preprocess_text(text), label) for text, label in data]

# Split data into train and test sets
texts, labels = zip(*preprocessed_data)
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Define the pipeline
pipeline = make_pipeline(TfidfVectorizer(), MultinomialNB())

# Train the model
pipeline.fit(X_train, y_train)

# Evaluate the model
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")

Model Accuracy: 0.0


In [33]:
# Save the model to disk
with open('chatbot_model.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

# Load the model from disk
with open('chatbot_model.pkl', 'rb') as file:
    loaded_pipeline = pickle.load(file)

In [34]:
# Function to get response
def get_response(user_input):
    preprocessed_input = preprocess_text(user_input)
    return loaded_pipeline.predict([preprocessed_input])[0]

In [37]:
# Example usage
print("Enter Your Question Hear and get ans (create for my self use only): ")
print(get_response(input()))

Enter Your Question Hear and get ans (create for my self use only): 
What is my favorite memory?
One of your favorite memories is winning the English Speaking Medal in college.
