# Sentiment Analysis with NLP
This notebook performs sentiment analysis on a dataset of movie reviews using TF-IDF vectorization and Logistic Regression.

In [29]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import random

positive_reviews = [
    "I love this product!", "Excellent quality and fast delivery.",
    "Absolutely wonderful experience.", "Exceeded my expectations!",
    "Great value for the price.", "Highly recommended!",
    "Superb service and support.", "Very satisfied with my purchase.",
    "Top-notch item.", "Five stars, would buy again!"
]

negative_reviews = [
    "Terrible experience.", "Worst purchase I've ever made.",
    "Not worth the money at all.", "Very disappointed.",
    "The quality is awful.", "Bad customer service.",
    "Product broke after one use.", "Horrible, don’t buy.",
    "Waste of money.", "I regret buying this."
]

# Create 100 random positive and 100 random negative reviews
reviews = [random.choice(positive_reviews) for _ in range(100)] + \
          [random.choice(negative_reviews) for _ in range(100)]
sentiments = [1] * 100 + [0] * 100

# Shuffle the dataset to mix positives and negatives
combined = list(zip(reviews, sentiments))
random.shuffle(combined)
reviews, sentiments = zip(*combined)

# DataFrame
import pandas as pd
df = pd.DataFrame({"review": reviews, "sentiment": sentiments})




# Vectorization
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['review'])
y = df['sentiment']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)


# Model training
model = LogisticRegression()
model.fit(X_train, y_train)

# Prediction and evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.94
Classification Report:
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        25
           1       1.00      0.88      0.94        25

    accuracy                           0.94        50
   macro avg       0.95      0.94      0.94        50
weighted avg       0.95      0.94      0.94        50

