In [1]:
# ------------------------------------------
# 📘 Sentiment Analysis on Text Data (NLP)
# ------------------------------------------

# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 2: Load dataset
data = pd.read_csv("sentiment_data.csv")

# Step 3: Explore data (optional)
print("🔹 First 5 rows:")
print(data.head(), "\n")

print("🔹 Sentiment distribution:")
print(data['Sentiment'].value_counts(), "\n")

# Step 4: Separate features (X) and target (y)
X = data['Text']
y = data['Sentiment']

# Step 5: Convert text to numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
X_tfidf = vectorizer.fit_transform(X)

# Step 6: Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf, y, test_size=0.2, random_state=42, stratify=y
)

# Step 7: Initialize and train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 8: Make predictions
y_pred = model.predict(X_test)

# Step 9: Evaluate the model
print("✅ Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(
    y_test, y_pred, labels=['Positive', 'Negative', 'Neutral']
))
print("\nClassification Report:\n", classification_report(
    y_test, y_pred, labels=['Positive', 'Negative', 'Neutral'], zero_division=0
))

# Step 10: Test with new text inputs
new_texts = [
    "I really love this app!",
    "This is terrible, I hate it.",
    "It's fine, nothing special.",
    "Absolutely fantastic experience!",
    "Worst thing I've ever bought."
]

# Convert new texts to TF-IDF
new_tfidf = vectorizer.transform(new_texts)

# Predict sentiments
predictions = model.predict(new_tfidf)

# Display predictions
print("\n📘 Predictions on New Text:")
for text, pred in zip(new_texts, predictions):
    print(f"Text: \"{text}\" → Sentiment: {pred}")


First 5 rows:
                                      Text Sentiment
0       I love this product, it's amazing!  Positive
1       This is the worst experience ever.  Negative
2              It's okay, nothing special.   Neutral
3  Absolutely fantastic! Highly recommend.  Positive
4         I hate the quality of this item.  Negative 

Value Counts:
Sentiment
Positive    5
Negative    5
Neutral     5
Name: count, dtype: int64 

Model Evaluation:
Accuracy: 0.6666666666666666

Confusion Matrix:
 [[0 1 0]
 [0 1 0]
 [0 0 1]]

Classification Report:
               precision    recall  f1-score   support

    Negative       0.50      1.00      0.67         1
     Neutral       1.00      1.00      1.00         1
    Positive       0.00      0.00      0.00         1

    accuracy                           0.67         3
   macro avg       0.50      0.67      0.56         3
weighted avg       0.50      0.67      0.56         3


Predictions on New Text:
Text: I really love this app! → Sentiment: Po

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
