<a href="https://colab.research.google.com/github/FatimaKhallifa/NLP-Sentiment-and-Grammar-Parser/blob/main/Movie_review_sentiment_analysis_and_grammar_parsing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import f1_score
import nltk
from nltk import CFG

# Load movie reviews
data = {
    "Review": [
        "The movie was fantastic and very enjoyable!",
        "I hated the film, it was a waste of time.",
        "A wonderful experience, truly moving.",
        "The acting was terrible and the plot was worse.",
        "An absolute masterpiece, highly recommend it!"
    ],
    "Sentiment": [1, 0, 1, 0, 1]
}
df = pd.DataFrame(data)

# Preprocess the dataset
X = df["Review"]
y = df["Sentiment"]

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Convert text to bag-of-words using CountVectorizer
cv = CountVectorizer(analyzer='word', ngram_range=(1, 1), stop_words='english')
X_train_cv = cv.fit_transform(X_train)
X_test_cv = cv.transform(X_test)

# Train a Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train_cv, y_train)

# Make predictions and calculate the F1 score
y_pred = clf.predict(X_test_cv)
score = f1_score(y_test, y_pred, average='micro')
print(f"F-1 Score: {score:.4f}")

# Define a context-free grammar for movie-related sentences
grammar = CFG.fromstring("""
  S -> NP VP
  NP -> Det N | Det N PP
  VP -> V NP | VP PP
  PP -> P NP
  Det -> 'a' | 'the'
  N -> 'movie' | 'plot' | 'character' | 'director'
  V -> 'enjoyed' | 'hated' | 'loved'
  P -> 'with' | 'by'
""")

# Create a parser
parser = nltk.ChartParser(grammar)

# Parse a movie-related sentence
sentence = "the director loved the plot with a character".split()
print("\nParsing sentence:")
for tree in parser.parse(sentence):
    print(tree)
    tree.pretty_print()


F-1 Score: 0.5000

Parsing sentence:
(S
  (NP (Det the) (N director))
  (VP
    (VP (V loved) (NP (Det the) (N plot)))
    (PP (P with) (NP (Det a) (N character)))))
                        S                                 
      __________________|_______                           
     |                          VP                        
     |                   _______|_________                 
     |                  VP                PP              
     |              ____|___          ____|___             
     NP            |        NP       |        NP          
  ___|_____        |     ___|___     |     ___|______      
Det        N       V   Det      N    P   Det         N    
 |         |       |    |       |    |    |          |     
the     director loved the     plot with  a      character

(S
  (NP (Det the) (N director))
  (VP
    (V loved)
    (NP (Det the) (N plot) (PP (P with) (NP (Det a) (N character))))))
                        S                             
