In [None]:
import nltk
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy
from nltk.tokenize import word_tokenize
import random

**Download necessary NLTK data**

In [None]:
nltk.download('movie_reviews')
nltk.download('punkt')

[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

**Define categories for the chatbot**

In [None]:
categories = ["Energy Efficiency Tips", "Renewable Energy Options", "Energy Saving Programs", "Technical Support", "Event Information"]

**Simulate a dataset for demonstration (for a real scenario, use actual data)**

In [None]:
def extract_features(words):
    return {word: True for word in words}

**Load and prepare the movie reviews dataset as a proxy**

In [None]:
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

**Shuffle dataset**

In [None]:
random.shuffle(documents)

**Extract features and labels**

In [None]:
featuresets = [(extract_features(doc), label) for doc, label in documents]

**Split data into training and testing sets**

In [None]:
train_size = int(len(featuresets) * 0.8)
train_set, test_set = featuresets[:train_size], featuresets[train_size:]

**Train Naive Bayes classifier**

In [None]:
classifier = NaiveBayesClassifier.train(train_set)

**Evaluate the classifier**

In [None]:
accuracy_score = accuracy(classifier, test_set)
print(f"Accuracy: {accuracy_score:.2f}")

Accuracy: 0.73


**Display most informative features**

In [None]:
print("Most Informative Features:")
classifier.show_most_informative_features(10)

Most Informative Features:
Most Informative Features
               stupidity = True              neg : pos    =     19.8 : 1.0
                   anger = True              pos : neg    =     18.9 : 1.0
                   sucks = True              neg : pos    =     17.1 : 1.0
             outstanding = True              pos : neg    =     14.4 : 1.0
                 offbeat = True              pos : neg    =     11.6 : 1.0
               ludicrous = True              neg : pos    =     11.5 : 1.0
              astounding = True              pos : neg    =     10.9 : 1.0
                captures = True              pos : neg    =     10.9 : 1.0
               strongest = True              pos : neg    =     10.3 : 1.0
                  symbol = True              pos : neg    =     10.3 : 1.0


**Function to classify user queries**

In [None]:
def classify_query(query):
    tokens = word_tokenize(query)
    features = extract_features(tokens)
    return classifier.classify(features)

In [None]:
sample_queries = [
    "How can I save energy at home?",
    "Tell me about upcoming events.",
    "What are the latest renewable energy options?",
    "I need help with technical support."
]

for query in sample_queries:
    print(f"Query: '{query}'")
    print(f"Classification: {classify_query(query)}")

Query: 'How can I save energy at home?'
Classification: neg
Query: 'Tell me about upcoming events.'
Classification: neg
Query: 'What are the latest renewable energy options?'
Classification: neg
Query: 'I need help with technical support.'
Classification: pos
