In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
'''1. Load Data'''
data = {
    'review': [
        "This product is amazing! I love it.",
        "Absolutely terrible experience, very disappointed.",
        "It's okay, not great but not bad either.",
        "Highly recommend this, works perfectly.",
        "Worst purchase ever, completely useless.",
        "Good value for money, satisfied with the quality.",
        "The service was slow and unhelpful.",
        "Fantastic! Exceeded my expectations.",
        "Could be better, some features are missing.",
        "Very happy with my new gadget, simple to use."
    ],
    'sentiment': [
        'positive',
        'negative',
        'neutral',
        'positive',
        'negative',
        'positive',
        'negative',
        'positive',
        'neutral',
        'positive'
    ]
}
df = pd.DataFrame(data)

print("--- Dataset Overview ---")
print("Sample Customer Reviews:")
print(df.head())
print(f"\nTotal reviews: {len(df)}")
print("\nSentiment Distribution:")
print(df['sentiment'].value_counts())

sentiment_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}
df['sentiment_encoded'] = df['sentiment'].map(sentiment_mapping)

X = df['review']
y = df['sentiment_encoded']

tfidf_vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')

X_vectorized = tfidf_vectorizer.fit_transform(X)

print("\n--- TF-IDF Vectorization ---")
print(f"Shape of vectorized data: {X_vectorized.shape}")
# print(f"Number of unique words (features) extracted: {len(tfidf_vectorizer.get_feature names_out())}")

X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.3, random_state=42, stratify=y)

print("\n--- Data Split ---")
print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

In [None]:
'''4. Train Logistic Regression Model'''
model = LogisticRegression(max_iter=1000, random_state=42)

print("\n--- Model Training ---")
model.fit(X_train, y_train)

print("Logistic Regression Model Trained Successfully!")
print(f"Model parameters: {model.get_params()}")

In [None]:
'''5. Make Predictions'''
y_pred = model.predict(X_test)

print("\n--- Predictions ---")
print("Sample Predictions vs. Actual values (encoded):")
predictions_df = pd.DataFrame({'Actual_Encoded': y_test.reset_index(drop=True), 'Predicted_Encoded': y_pred})
print(predictions_df)

reverse_sentiment_mapping = {v: k for k, v in sentiment_mapping.items()}
predictions_df['Actual_Sentiment'] = predictions_df['Actual_Encoded'].map(reverse_sentiment_mapping)
predictions_df['Predicted_Sentiment'] = predictions_df['Predicted_Encoded'].map(reverse_sentiment_mapping)
print("\nSample Predictions vs. Actual values (Sentiment Labels):")
print(predictions_df[['Actual_Sentiment', 'Predicted_Sentiment']])

In [None]:
'''6. Evaluate Model'''
print("\n--- Model Evaluation ---")

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

class_report = classification_report(y_test, y_pred, target_names=sentiment_mapping.keys())
print("\nClassification Report:\n", class_report)

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)
print("\nInterpretation of Confusion Matrix:")
print(f"Rows: Actual classes ({list(sentiment_mapping.keys())[0]}, {list(sentiment_mapping.keys())[1]}, {list(sentiment_mapping.keys())[2]})")
print(f"Columns: Predicted classes ({list(sentiment_mapping.keys())[0]}, {list(sentiment_mapping.keys())[1]}, {list(sentiment_mapping.keys())[2]})")
