In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load the dataset
# Assume 'women_clothing_reviews.csv' contains columns 'Review' and 'Sentiment'
data = pd.read_csv('women_clothing_reviews.csv')

# Display the first few rows of the dataset
print("Dataset Preview:")
print(data.head())

# Step 2: Split the data into features and target labels
X = data['Review']
y = data['Sentiment']

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Convert text to a matrix of token counts
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

# Step 5: Train the Naive Bayes model
model = MultinomialNB()
model.fit(X_train_counts, y_train)

# Step 6: Make predictions on the test set
y_pred = model.predict(X_test_counts)

# Step 7: Display accuracy
print("\nAccuracy of the Model:")
print(accuracy_score(y_test, y_pred))

# Step 8: Display detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Step 9: Test with new data (Optional)
new_reviews = ["This dress is fantastic! I love it.", "The material is terrible and it doesn't fit well."]
new_reviews_counts = vectorizer.transform(new_reviews)
predictions = model.predict(new_reviews_counts)

print("\nPredictions for New Reviews:")
print(predictions)


Dataset Preview:
                                              Review Sentiment
0  Absolutely wonderful - silky and sexy and comf...  positive
1  I had such high hopes for this dress and reall...  negative
2  This dress is pretty but the material feels cheap  negative
3     Love this dress! It's perfect for a summer day  positive
4          The color is beautiful but the fit is off  negative

Accuracy of the Model:
1.0

Classification Report:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1


Predictions for New Reviews:
['positive' 'negative']
