In [3]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk import NaiveBayesClassifier
from nltk.classify import accuracy

# Download the necessary resources
nltk.download('punkt')
nltk.download('stopwords')

# Preprocessing function
def preprocess(text):
    tokens = word_tokenize(text.lower())  # Tokenization and lowercase
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.isalpha() and word not in stop_words]  # Removing stopwords and non-alphabetic tokens
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]  # Stemming
    return dict([(token, True) for token in tokens])

# Read dataset from a text file
dataset_file = "dataset.txt"  # Path to your dataset file
dataset = []
with open(dataset_file, 'r') as file:
    for line in file:
        text, label = line.strip().split(",")
        dataset.append((text, label))

# Preprocess the dataset
preprocessed_dataset = [(preprocess(text), label) for text, label in dataset]

# Split data into training and testing sets
train_data = preprocessed_dataset[:90]
test_data = preprocessed_dataset[10:]

# Train the Naive Bayes Classifier
classifier = NaiveBayesClassifier.train(train_data)

# Test the classifier
print("Accuracy:", accuracy(classifier, test_data))

# Test a new text segment
text_to_classify = "The product exceeded my expectations"  # You can change this text to test different segments
preprocessed_text = preprocess(text_to_classify)
print("Classification:", classifier.classify(preprocessed_text))


Accuracy: 0.9333333333333333
Classification: positive


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
