In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def preprocess_text(text):
    words = word_tokenize(text)
    stop_words = set(stopwords.words("english"))
    filtered_words = [word.lower() for word in words if word.lower() not in stop_words]
    
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]
    
    return " ".join(lemmatized_words)

def text_classification(sentences, labels, test_sentence):
    preprocessed_sentences = [preprocess_text(sentence) for sentence in sentences]
    
    preprocessed_test_sentence = preprocess_text(test_sentence)
    
    vectorizer = TfidfVectorizer()
    X_train = vectorizer.fit_transform(preprocessed_sentences)
    
    # Train a Naive Bayes classifier
    classifier = MultinomialNB()
    classifier.fit(X_train, labels)
    
    # Vectorize the preprocessed test sentence
    X_test = vectorizer.transform([preprocessed_test_sentence])
    
    # Predict the label for the test sentence
    predicted_label = classifier.predict(X_test)
    
    return predicted_label[0]

def main():
    sentences = [
        "I love to play football.",
        "She sings beautifully.",
        "He is a great chef.",
        "The movie was boring.",
        "They won the championship.",
        "The weather is nice today."
    ]
    labels = ["sports", "music", "food", "movies", "sports", "weather"]
    
    # Test sentence
    test_sentence = "I am going to watch a movie tonight."
    
    # Perform text classification
    predicted_label = text_classification(sentences, labels, test_sentence)
    print("Predicted label:", predicted_label)

if __name__ == "__main__":
    main()


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/rohansridhar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/rohansridhar/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/rohansridhar/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Predicted label: sports
