In [6]:
import nltk
import os

In [7]:
def extract_words(document):
    return set(
        word.lower() for word in nltk.word_tokenize(document)
        if any(c.isalpha() for c in word)
    )

In [8]:
def load_data(directory):
    result = []
    for filename in ["Positives.txt", "Negatives.txt"]:
        with open(os.path.join(directory, filename)) as f:
            result.append([
                extract_words(line)
                for line in f.read().splitlines()
            ])
    return result

In [9]:
def generate_features(documents, words, label):
    features = []
    for document in documents:
        features.append(({
            word: (word in document)
            for word in words
        }, label))
    return features

In [10]:

def classify(classifier, document, words):
    document_words = extract_words(document)
    features = {
        word: (word in document_words)
        for word in words
    }
    return classifier.prob_classify(features)

In [13]:

positives, negatives = load_data("./Data")

words = set()
for document in positives:
    words.update(document)
for document in negatives:
    words.update(document)

training = []
training.extend(generate_features(positives, words, "Positive"))
training.extend(generate_features(negatives, words, "Negative"))

classifier = nltk.NaiveBayesClassifier.train(training)
s = input("s: ")
result = (classify(classifier, s, words))
for key in result.samples():
    print(f"{key}: {result.prob(key):.4f}")

Positive: 0.4980
Negative: 0.5020
