In [58]:
import nltk                   # Import necessary libraries
import pandas as pd
import numpy as np
import re
from nltk.tokenize import word_tokenize

from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Ensure necessary NLTK data is downloaded
# nltk.download('punkt')

# Assuming you have a predefined list of positive and negative terms
# These lists should be populated with your actual word lists
positive_terms = ['happy', 'joy', 'good', 'excellent', 'fortunate',"wonderful","beautiful"]  # example
negative_terms = ['sad', 'angry', 'bad', 'poor', 'unfortunate',"upset"]  # example

# Sample datasets for positive and negative sentences (replace with actual datasets)
positive_text = [
    "I am feeling hot today", 
    "What a wonderful day", 
    "Life is beautiful",
    "I am extremely happy with the results",
    "This is the best experience I’ve ever had",
    "I love the way this turned out",
    "The atmosphere here is so joyful",
    "Everything is going perfectly well",
    "I am thrilled about the new opportunities",
    "This brings me immense joy",
    "Today is an excellent day",
    "Feeling grateful and blessed",
    "The weather is fantastic, I'm loving it",
    "She is so kind and generous",
    "I am fortunate to have such great friends",
    "The service was outstanding and delightful",
    "It’s been a great day filled with happiness",
    "I’m enjoying every moment of this",
    "They did a wonderful job with the project",
    "The scenery is stunning and breathtaking",
    "I couldn’t be happier with the results",
    "Everything about this place is positive and uplifting",
    "I’m feeling on top of the world today",
    "The event was well-organized and amazing",
    "I’m truly blessed to have experienced this"
]

negative_text = [
    "I am feeling bad today", 
    "This is such a poor experience", 
    "I am upset with this",
    "I’m disappointed in the service I received",
    "This is one of the worst days ever",
    "I feel so sad and lonely right now",
    "The results are absolutely terrible",
    "I’m angry about how things turned out",
    "This place is dirty and uninviting",
    "I had a horrible time at the event",
    "The experience was nothing but a disappointment",
    "It’s frustrating to deal with these issues",
    "I regret coming here, it was a waste of time",
    "The service was bad, I am not satisfied",
    "Everything seems to be going wrong today",
    "I’m completely dissatisfied with the product",
    "The situation is unfortunate and upsetting",
    "The atmosphere here is depressing",
    "I am extremely unhappy with the results",
    "This has been a terrible experience overall",
    "Nothing good came out of this situation",
    "I can’t stand how awful the conditions are",
    "I’m feeling very stressed and anxious",
    "The entire experience left a bad taste",
    "This decision was a complete failure"
]


def simple_tokenize(sentence):
    return re.findall(r'\b\w+\b', sentence.lower())

# Function to compute sentiment score of a single sentence
def compute_sentiment_score(sentence, neg_terms=negative_terms, pos_terms=positive_terms):
    # Lowercase and clean the sentence
    sentence = re.sub(r'\d+', '', re.sub(r'[^\w\s]', '', sentence.lower()))
    words = simple_tokenize(sentence)
    
    # Tokenize the sentence into words
    # words = word_tokenize(sentence)
    
    # Count matches with positive and negative terms
    neg_matches = sum(1 for word in words if word in neg_terms)
    pos_matches = sum(1 for word in words if word in pos_terms)
    
    # Return the original sentence and the counts of positive and negative matches
    return [sentence, neg_matches, pos_matches]

# Function to compute sentiment scores for a list of sentences
def compute_sentiment_scores(sentences):
    scores = [compute_sentiment_score(sentence) for sentence in sentences]
    return pd.DataFrame(scores, columns=['sentence', 'neg', 'pos'])

# Compute sentiment scores for positive and negative datasets
pos_results = compute_sentiment_scores(positive_text)
pos_results['sentiment'] = 'positive'

neg_results = compute_sentiment_scores(negative_text)
neg_results['sentiment'] = 'negative'

# Combine positive and negative results
total_results = pd.concat([pos_results, neg_results], ignore_index=True)
# print(total_results)

# Convert 'sentiment' column to categorical (like factor in R)
total_results['sentiment'] = total_results['sentiment'].astype('category')

# Prepare feature set (neg, pos) and target variable (sentiment)
X = total_results[['neg', 'pos']]
y = total_results['sentiment']
# print(X)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=42)
# print(X_train)

In [59]:

# Initialize and train the Naive Bayes classifier
# print ("x and y :",X_test,y_test)
naive_bayes_classifier = MultinomialNB()
naive_bayes_classifier.fit(X_train, y_train)
# print(X_test)
# Make predictions on the test set
y_pred = naive_bayes_classifier.predict(X_test)
# print(y_pred)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Optionally, display the confusion matrix
from sklearn.metrics import confusion_matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Model Accuracy: 80.00%
Confusion Matrix:
[[5 3]
 [0 7]]
