In [1]:
# Step 1: Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
# Step 2: Load dataset
df = pd.read_csv('dataset.csv')

# Step 3: Check data (optional)
print(df.head())
print(df['Sentiment'].value_counts())  # 0: Negative, 1: Neutral, 2: Positive

# Step 4: Feature and label separation
X = df['Comment']
y = df['Sentiment']

# Step 5: Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

# Step 6: TF-IDF vectorization
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Step 7: Initialize and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_tfidf, y_train)

# Step 8: Predict and evaluate
y_pred = rf_model.predict(X_test_tfidf)

print("Random Forest Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

                                             Comment  Sentiment
0  Hopefully Batman shows up in this :fire::fire:...          2
1  Im making a solemn oath right now that Im not ...          1
2  Been a fan of his since euphoria dude is good ...          2
3    Every good story starts with a when i was a kid          2
4  Hunter Schafer? NEON produced horror film? Im ...          2
Sentiment
2    3110
1    2012
0    1840
Name: count, dtype: int64
Random Forest Accuracy: 0.6676238334529792
Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.52      0.58       368
           1       0.60      0.57      0.58       403
           2       0.70      0.82      0.76       622

    accuracy                           0.67      1393
   macro avg       0.66      0.64      0.64      1393
weighted avg       0.66      0.67      0.66      1393

Confusion Matrix:
 [[190  75 103]
 [ 60 230 113]
 [ 33  79 510]]
