In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Manual dataset with more samples for better results
data = {
    'text': [
        "I love this movie, it was fantastic!",
        "What a horrible experience, I hated it.",
        "The product is great and works well.",
        "Terrible service, very disappointed.",
        "Excellent quality, totally worth the money!",
        "Worst app ever, do not download.",
        "I'm very happy with my purchase.",
        "It was a waste of time, awful.",
        "Such a good book, I enjoyed every page.",
        "This is bad, I don’t recommend it.",
        "I am extremely satisfied with the support.",
        "The food was disgusting and cold.",
        "Highly recommend this to everyone.",
        "The device stopped working after a day.",
        "Absolutely fantastic performance.",
        "Not worth the price at all.",
        "I feel great using this product.",
        "Such a terrible experience, never again.",
        "The package arrived earlier than expected.",
        "Disappointed with the quality."
    ],
    'label': [1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]  # 1=Positive, 0=Negative
}

df = pd.DataFrame(data)

# Vectorize the text
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['text'])
y = df['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=0))

# Custom inputs to test
custom_sentences = [
    "This phone is amazing, I love it!",
    "Completely useless, I'm unhappy with it.",
    "Not bad, but could be better.",
    "The experience was okay, nothing special."
]
custom_vectors = vectorizer.transform(custom_sentences)
custom_preds = model.predict(custom_vectors)

print("\nCustom Input Predictions:")
for text, pred in zip(custom_sentences, custom_preds):
    print(f"'{text}' => {'Positive' if pred == 1 else 'Negative'}")


Accuracy: 0.5

Classification Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         2
           1       0.00      0.00      0.00         2

    accuracy                           0.50         4
   macro avg       0.25      0.50      0.33         4
weighted avg       0.25      0.50      0.33         4


Custom Input Predictions:
'This phone is amazing, I love it!' => Positive
'Completely useless, I'm unhappy with it.' => Negative
'Not bad, but could be better.' => Negative
'The experience was okay, nothing special.' => Negative
