Ex 1

In [None]:
import nltk
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt')
from nltk.tokenize import word_tokenize

print("\n--- Exercise 1: Part-of-Speech Tagging with NLTK ---")

text5 = "The quick brown fox jumps over the lazy dog."
tokens5 = word_tokenize(text5)
pos_tags = nltk.pos_tag(tokens5)

print(f"Original text: '{text5}'")
print(f"Tokens: {tokens5}")
print(f"POS Tags:")
for word, tag in pos_tags:
    print(f"{word:<10} {tag}")


Ex 2

In [None]:
import spacy

print("\n--- Exercise 2: Dependency Parsing with SpaCy ---")

# Load SpaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

text7 = "Apple is looking at buying U.K. startup for $1 billion."
doc = nlp(text7)

print(f"Original text: '{text7}'")
print("Dependency Parse:")
for token in doc:
    print(f"{token.text:<12} POS={token.pos_:<8} DEP={token.dep_:<15} HEAD={token.head.text}")


Ex 3

In [None]:
import nltk
nltk.download('stopwords')

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

print("\n--- Exercise 3: Text Classification with scikit-learn ---")

# Sample dataset
texts = [
    "This movie is fantastic and I love it!",
    "What a terrible film, absolutely dreadful.",
    "The acting was good, but the plot was boring.",
    "A truly amazing experience, highly recommended.",
    "I hated every minute of this movie, so bad.",
    "It was an okay film, nothing special."
]

labels = ['positive', 'negative', 'neutral', 'positive', 'negative', 'neutral']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.3, random_state=42
)

# Pipeline
text_clf = Pipeline([
    ('vect', CountVectorizer()),
    ('clf', MultinomialNB())
])

# Train
text_clf.fit(X_train, y_train)

# Predict
predicted = text_clf.predict(X_test)

print("Test sentences:")
print(X_test)
print("\nActual labels:")
print(y_test)
print("\nPredicted labels:")
print(predicted)

print("\nClassification Report:")
print(classification_report(y_test, predicted, zero_division=0))

# Predict new sentence
new_sentence = "I really enjoyed this production, very entertaining!"
prediction = text_clf.predict([new_sentence])

print(f"\nNew sentence: '{new_sentence}'")
print(f"Predicted sentiment: {prediction[0]}")


Challenge 

In [None]:
paragraph = (
    "The film started with a strong opening and excellent visuals. "
    "However, the storyline quickly became predictable and dull. "
    "Despite a few good performances, the overall experience was disappointing."
)

print("\n--- Challenge: Integrated NLP Pipeline ---")
print(f"\nParagraph:\n{paragraph}")

print("\nTokenization & POS Tagging:")

tokens = word_tokenize(paragraph)
pos_tags = nltk.pos_tag(tokens)

for word, tag in pos_tags:
    print(f"{word:<12} {tag}")
print("\nDependency Parsing:")

doc = nlp(paragraph)
for token in doc:
    print(f"{token.text:<12} POS={token.pos_:<8} DEP={token.dep_:<15} HEAD={token.head.text}")

from nltk.tokenize import sent_tokenize

sentences = sent_tokenize(paragraph)
predictions = text_clf.predict(sentences)

print("\nSentence-level sentiment predictions:")
for sent, pred in zip(sentences, predictions):
    print(f"Sentence: {sent}")
    print(f"Predicted sentiment: {pred}\n")

# Majority vote for paragraph sentiment
from collections import Counter
overall_sentiment = Counter(predictions).most_common(1)[0][0]

print(f"Overall paragraph sentiment (majority vote): {overall_sentiment}")


POS tagging identifies grammatical roles (nouns, verbs, adjectives).

Dependency parsing shows how words relate syntactically.

Text classification predicts sentiment using a trained Naive Bayes model.

Since the classifier is sentence-based, the paragraph was split into sentences and classified individually.

A majority vote was used to determine the overall sentiment.