In [2]:
# ✅ Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ✅ Set random seed
np.random.seed(42)

# ✅ Load your dataset
df = pd.read_csv("flipkart_reviews.csv")
# ✅ Map star ratings to sentiment labels
# 1-2 = negative, 3 = neutral, 4-5 = positive
def map_sentiment(rating):
    if rating <= 2:
        return 'negative'
    elif rating == 3:
        return 'neutral'
    else:
        return 'positive'

df['Sentiment'] = df['rating'].apply(map_sentiment)

# ✅ Encode Sentiment into numeric labels
label_encoder = LabelEncoder()
df['Sentiment_Label'] = label_encoder.fit_transform(df['Sentiment'])

# ✅ Convert review text into numeric features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X = vectorizer.fit_transform(df['review'])  # Use 'review' column directly
y = df['Sentiment_Label']

# ✅ Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ✅ Train the SVM model
svm_model = LinearSVC(max_iter=10000)
svm_model.fit(X_train, y_train)

# ✅ Predict and evaluate
y_pred = svm_model.predict(X_test)

print("🔍 SVM Accuracy Score:", accuracy_score(y_test, y_pred))
print("📊 Classification Report:\n", classification_report(y_test, y_pred))
print("📉 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


FileNotFoundError: [Errno 2] No such file or directory: 'flipkart_reviews.csv'