In [None]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score

positive_reviews = [
    "Amazing movie!", "Loved it", "Great acting", "Fantastic story", "Wonderful experience",
    "Best film ever", "Absolutely brilliant", "Enjoyed every moment", "Highly recommended", "Very touching",
    "Heartwarming", "Beautifully made", "Excellent direction", "Perfectly executed", "A masterpiece",
    "Superb film", "Five stars", "Top notch!", "Really enjoyable", "Loved the cinematography",
    "Nice plot", "Great soundtrack", "Nice cast", "Truly inspiring", "Very entertaining",
    "Worth watching", "Perfect ending", "Engaging throughout", "Best performance", "Oscar-worthy",
    "Refreshing story", "Feel-good film", "Impressive work", "Emotional and deep", "Very clever",
    "Delightful experience", "Truly amazing", "Gripping and emotional", "Strong performance", "Loved the characters",
    "Top performance", "Uplifting movie", "Great visuals", "Brilliantly written", "Touching and beautiful",
    "Highly enjoyable", "Unforgettable experience", "Deep and meaningful", "Great entertainment", "Solid story"]

negative_reviews = [
    "Terrible movie", "Did not like it", "Worst acting", "Very boring", "Awful plot",
    "Bad direction", "Poorly written", "Wasted time", "Not enjoyable", "Disappointing",
    "Horrible experience", "Too slow", "Unrealistic", "Messy script", "No chemistry",
    "Flat storyline", "Low quality", "Very predictable", "No emotions", "Bad performance",
    "Painful to watch", "Not recommended", "Forgettable", "Too long", "Terrible pacing",
    "Awful dialogue", "Very weak", "Trash movie", "Overhyped", "Worst film ever",
    "Poor acting", "Nothing good", "Ridiculous plot", "Zero stars", "Unbearable",
    "Clumsy and awkward", "Lacked emotion", "Too confusing", "Dull from start", "Not worth it",
    "Waste of money", "Bad cinematography", "Lame twist", "Cheap production", "Hated it",
    "Weak characters", "Sloppy editing", "Overacted", "Cliché scenes", "Total disappointment"]

reviews = positive_reviews + negative_reviews
labels = ['good'] * 50 + ['bad'] * 50

df = pd.DataFrame({
    'Review': reviews,
    'labels': labels})


In [None]:
vectorizer = TfidfVectorizer(max_features=300, stop_words='english', lowercase=True)
X = vectorizer.fit_transform(df['Review'])
y = df['labels']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

precision = precision_score(y_test, y_pred, average='binary', pos_label='good')
recall = recall_score(y_test, y_pred, average='binary', pos_label='good')
f1 = f1_score(y_test, y_pred, average='binary', pos_label='good')

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Precision: 0.8
Recall: 0.5714285714285714
F1 Score: 0.6666666666666666


In [None]:
def text_preprocess_vectorize(texts, vectorizer):
    return vectorizer.transform(texts)

sample_texts = ["I love this!", "It's awful."]
X_sample = text_preprocess_vectorize(sample_texts, vectorizer)
prediction = model.predict(X_sample)
print(prediction)

['bad' 'bad']
