<a href="https://colab.research.google.com/github/Flabert/Calculator/blob/main/Natural_Language.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Install libraries
!pip install nltk scikit-learn matplotlib wordcloud
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from wordcloud import WordCloud

#Download NLTK dataset
nltk.download('punkt')
nltk.download('movie_reviews')

#Load the movie reviews dataset
from nltk.corpus import movie_reviews

positive_reviews = movie_reviews.fileids('pos')
negative_reviews = movie_reviews.fileids('neg')

#Create a function to extract reviews from the corpus
def extract_reviews(fileids):
    return[" ".join (movie_reviews.words (fileid)) for fileid in fileids]

#Extract reviews
positive_texts = extract_reviews(positive_reviews)
negative_texts = extract_reviews(negative_reviews)

#Combine and label the data
texts = positive_texts + negative_texts
labels = [1] * len(positive_texts) + [0] * len(negative_texts)

#Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

#Text classification pipeline
model = make_pipeline(CountVectorizer(), MultinomialNB())

#Train the model
model.fit(x_train, y_train)

#Evaluate model
predicted = model.predict(x_test)
accuracy = accuracy_score(y_test, predicted)
report = classification_report(y_test, predicted)

print (f"Accuracy: {accuracy:.2f}")
print ("Classification Report:")
print (report)

#Generate and display word clouds
positive_words = " ".join(positive_texts)
negative_words = " ".join(negative_texts)

positive_wordcloud = WordCloud(width=800, height=400, background_color ="white").generate(positive_words)
negative_wordcloud = WordCloud(width=800, height=400, background_color ="black").generate(negative_words)

plt.figure (figsize=(14, 7))

plt.subplot (1, 2, 1)
plt.imshow (positive_wordcloud, interpolation='bilinear')
plt.title ("Positive Words")
plt.axis ("off")

plt.subplot (1, 2, 2)
plt.imshow (negative_wordcloud, interpolation='bilinear')
plt.title ("Negative Words")
plt.axis ("off")

plt.show()

#Display confusion matrix
cm = confusion_matrix(y_test, predicted)
ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Negative', 'Positive']).plot()
plt.title ("Confusion Matrix")
plt.show()

#Enable user input for sentimen analysis
def analyse_sentiment():
    while True:
        user_input = input(f"Enter a movie review (type 'quit' to exit): \n")
        if user_input.lower() == 'quit':
            break
        prediction = model.predict([user_input])
        sentiment = "Positive" if prediction == 1 else "Negative"
        print (f"Sentiment: {sentiment}")

#Test with user input
analyse_sentiment()


