In [1]:
# CISB5123 Text Analytics - Lab Assignment 2
# Name: MUHAMMAD IDLAN HAKIMI BIN MOHD AZIZI | ID: SN01082897
# Name: Thanes Selvam | ID: SN01082944

import pandas as pd
from textblob import TextBlob
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Load and preprocess data
df = pd.read_csv("Reviews.csv")
df = df[['Score', 'Text']].dropna()
df = df[df['Score'] != 3]  # Remove neutral reviews
df['Label'] = df['Score'].apply(lambda x: 'positive' if x > 3 else 'negative')

# Sample only a small subset for quick demo
df = df.sample(n=300, random_state=42).reset_index(drop=True)

# Lexicon-based sentiment analysis using TextBlob
def get_textblob_sentiment(text):
    polarity = TextBlob(text).sentiment.polarity
    return 'positive' if polarity > 0 else 'negative'

df['TextBlob_Pred'] = df['Text'].apply(get_textblob_sentiment)

# Classification report for TextBlob
print("TextBlob Classification Report:")
print(classification_report(df['Label'], df['TextBlob_Pred'], target_names=['negative', 'positive']))

# Machine learning-based sentiment analysis using SVM
X = df['Text']
y = df['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

svm = SVC(kernel='linear')
svm.fit(X_train_vec, y_train)

# Classification report for SVM
print("SVM Classification Report:")
print(classification_report(y_test, svm.predict(X_test_vec), target_names=['negative', 'positive']))

# Discussion
# Discussion
print("""
Discussion:
TextBlob is simple to use. It checks words against a list to see if they are positive or negative.

SVM is a type of machine learning that learns from examples. It usually works better because it adapts to the data.

Overall, SVM gave better results than TextBlob in our tests. This shows that learning from data can be more effective than just using a list of words.
""")


TextBlob Classification Report:
              precision    recall  f1-score   support

    negative       0.54      0.29      0.38        48
    positive       0.88      0.95      0.91       252

    accuracy                           0.85       300
   macro avg       0.71      0.62      0.65       300
weighted avg       0.82      0.85      0.83       300

SVM Classification Report:
              precision    recall  f1-score   support

    negative       0.42      0.38      0.40        13
    positive       0.90      0.91      0.90        77

    accuracy                           0.83        90
   macro avg       0.66      0.65      0.65        90
weighted avg       0.83      0.83      0.83        90


Discussion:
TextBlob is simple to use. It checks words against a list to see if they are positive or negative.

SVM is a type of machine learning that learns from examples. It usually works better because it adapts to the data.

Overall, SVM gave better results than TextBlob in our tes