In [11]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


nltk.download('punkt')  
nltk.download('wordnet')  
nltk.download('omw-1.4')  


df = pd.read_csv('musical1.tsv', sep='\t')

# Question 1: Tokenize the given dataset reviews
df['tokens'] = df['Review'].apply(word_tokenize)

# Question 2: Perform stemming for the tokens of the reviews
stemmer = PorterStemmer()
df['stemmed'] = df['tokens'].apply(lambda x: [stemmer.stem(token) for token in x])

# Question 3: Perform lemmatization for the stemmed tokens
lemmatizer = WordNetLemmatizer()
df['lemmatized'] = df['stemmed'].apply(lambda x: [lemmatizer.lemmatize(token) for token in x])


df['processed_text'] = df['lemmatized'].apply(lambda x: ' '.join(x))


X_train, X_test, y_train, y_test = train_test_split(df['processed_text'], df['Score'], test_size=0.2, random_state=42)


vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Question 4: Build the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train_vec, y_train)


y_pred = rf_classifier.predict(X_test_vec)

# Question 5: Evaluate the model by finding its accuracy, precision, recall, and F1-score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)


print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}\n")


print("Model Evaluation Summary:")
print(f"Accuracy: {accuracy:.3f} - This indicates that the model correctly predicts {accuracy * 100:.1f}% of the total cases. Accuracy is a good measure when the target classes are well balanced.\n")
print(f"Precision: {precision:.3f} - This shows that when the model predicts a review to be positive, it is correct {precision * 100:.1f}% of the time. Precision is particularly important if the cost of false positives is high.\n")
print(f"Recall: {recall:.3f} - This means that the model is able to identify {recall * 100:.1f}% of all actual positive cases. High recall is crucial when it's important to capture as many positives as possible.\n")
print(f"F1 Score: {f1:.3f} - The F1 score combines precision and recall into a single metric by taking their harmonic mean. An F1 score of {f1:.3f} suggests a balanced performance between precision and recall, which is beneficial in cases where both false positives and false negatives are costly.")


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/drmdshowkatkabir/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/drmdshowkatkabir/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/drmdshowkatkabir/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Accuracy: 0.765
Precision: 0.7913043478260869
Recall: 0.7982456140350878
F1 Score: 0.794759825327511

Model Evaluation Summary:
Accuracy: 0.765 - This indicates that the model correctly predicts 76.5% of the total cases. Accuracy is a good measure when the target classes are well balanced.

Precision: 0.791 - This shows that when the model predicts a review to be positive, it is correct 79.1% of the time. Precision is particularly important if the cost of false positives is high.

Recall: 0.798 - This means that the model is able to identify 79.8% of all actual positive cases. High recall is crucial when it's important to capture as many positives as possible.

F1 Score: 0.795 - The F1 score combines precision and recall into a single metric by taking their harmonic mean. An F1 score of 0.795 suggests a balanced performance between precision and recall, which is beneficial in cases where both false positives and false negatives are costly.
