In [9]:
# Sentiment Analysis using Logistic Regression

# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [10]:
# Load dataset (74)
data = {
    'review': [
        'I love this product!', 'This is the worst movie ever.',
        'Absolutely fantastic, would recommend.', 'Not good at all.',
        'An amazing experience!', 'Terrible, would not recommend.',
        'I enjoyed every moment of it.', 'It was disappointing.',
        'Best purchase I made this year.', 'Completely useless, broke after a week.',
        'Outstanding performance, highly recommended!', 'Awful service, never again.',
        'A masterpiece of storytelling.', 'Poor quality, I want a refund.',
        'Exceeded my expectations, great product!', 'Terrible acting, I walked out.',
        'It’s a must-watch!', 'I regret watching it, waste of time.',
        'Excellent features and easy to use.', 'The movie plot made no sense at all.',
        'Perfect in every way.', 'Worst experience of my life.',
        'Incredible value for the price.', 'I couldn’t even finish it, so boring.',
        'Loved the special effects, amazing job!', 'The food was inedible.',
        'The best vacation I’ve ever had.', 'I will never come back to this place.',
        'Top-notch customer service.', 'Disgusting hygiene standards, avoid at all costs.',
        'Beautifully designed and functional.', 'Completely disappointed, not worth it.',
        'A brilliant and moving performance.', 'The worst phone I’ve ever bought.',
        'Fantastic experience, five stars.', 'Would not recommend to anyone.',
        'A joy to watch, thoroughly entertaining.', 'This movie was a total disaster.',
        'The quality exceeded my expectations.', 'It felt like a waste of money.',
        'Wonderful product, I’m very satisfied.', 'Terrible product, stay away.',
        'This restaurant offers great food.', 'The service was horrendous.',
        'Superb acting, loved every minute.', 'Hated the experience from start to finish.',
        'This is the best thing I’ve bought.', 'Completely underwhelming, not worth it.',
        'Amazing craftsmanship, so happy with it.', 'The item arrived damaged and late.',
        'Incredible plot twists, kept me engaged.', 'Dull and lifeless performances.',
        'Best coffee I’ve ever tasted!', 'Tasted like burnt rubber, awful.',
        'I’d recommend it to all my friends.', 'Wouldn’t wish this on my worst enemy.',
        'Great vacation spot, had a blast!', 'Never staying here again, horrible place.',
        'Outstanding performance by the lead actor.', 'The supporting cast was dreadful.',
        'A heartwarming and uplifting story.', 'Boring and predictable, I fell asleep.',
        'Best decision I’ve ever made.', 'Worst service I’ve encountered.',
        'I loved how easy it was to use.', 'Impossible to figure out, terrible design.',
        'An unforgettable experience!', 'A forgettable movie, don’t waste your time.',
        'I would definitely buy it again.', 'I regret ever buying this.',
        'Brilliant cinematography, visually stunning.', 'The CGI looked so fake, terrible.',
        'I’m so happy with this purchase.', 'It broke after two uses, terrible quality.'
    ],
    'sentiment': [
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0
    ]
}

df = pd.DataFrame(data)
df.head(10)  # Display the first 10 rows of the dataset

Unnamed: 0,review,sentiment
0,I love this product!,1
1,This is the worst movie ever.,0
2,"Absolutely fantastic, would recommend.",1
3,Not good at all.,0
4,An amazing experience!,1
5,"Terrible, would not recommend.",0
6,I enjoyed every moment of it.,1
7,It was disappointing.,0
8,Best purchase I made this year.,1
9,"Completely useless, broke after a week.",0


In [11]:
# Preprocessing the data
# Split data into features (X) and labels (y)
X = df['review']
y = df['sentiment']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize the text data (convert text to numerical data)
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Initialize the logistic regression model
model = LogisticRegression()

# Train the model
model.fit(X_train_vec, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_vec)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# Test with a custom input
def predict_sentiment(review):
    review_vec = vectorizer.transform([review])
    prediction = model.predict(review_vec)
    sentiment = 'Positive' if prediction[0] == 1 else 'Negative'
    return sentiment

# Predict sentiment for a custom review
custom_review = "i don't love this product."
print(f"\nSentiment prediction for '{custom_review}': {predict_sentiment(custom_review)}")


Accuracy: 86.67%

Classification Report:

              precision    recall  f1-score   support

           0       0.80      1.00      0.89         8
           1       1.00      0.71      0.83         7

    accuracy                           0.87        15
   macro avg       0.90      0.86      0.86        15
weighted avg       0.89      0.87      0.86        15


Sentiment prediction for 'i don't love this product.': Positive
