Import necessary libraries.

In [1]:
import pandas as pd
from transformers import pipeline
import matplotlib.pyplot as plt
import seaborn as sns


Load the car review Dataset.

In [2]:
# Load the customer reviews dataset
reviews = pd.read_csv('car_reviews.csv')
print(reviews.head())

                                              Review
0  Bought 2017 Optima Hybrid in November 17. It w...
1   You get a lot for your money and great perfor...
2   This car is amazing and have no complaints. Y...
3  At 11k now in a lease for 39 months and it onl...
4  I've owned BMW, Lexus, Mercedes-Benz in the la...


Classify Reviews into Categories

In [None]:
# Initialize the zero-shot classification pipeline
zero_shot_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Define candidate labels
candidate_labels = ["driving experience", "features",
                    "value for money", "issues", "other"]

# Classify each review
reviews['talks_about'] = reviews['Review'].apply(lambda x: zero_shot_classifier(x, candidate_labels)['labels'][0])

Perform Sentiment Analysis

In [None]:
# Initialize the sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# Analyze sentiment for each review
reviews['sentiment'] = reviews['Review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])

Visualize Sentiment Distribution

In [None]:
# Visualize the sentiment distribution
plt.figure(figsize=(8, 5))
sns.countplot(data=reviews, x='sentiment', palette='viridis')
plt.title('Sentiment Distribution of Reviews')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.show()

  Visualize Review Category Distribution

In [None]:
# Visualize the distribution of review categories
plt.figure(figsize=(10, 6))
sns.countplot(data=reviews, x='talks_about', palette='plasma')
plt.title('Distribution of Review Categories')
plt.xlabel('Review Category')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

Save Updated Dataset


In [None]:
# Save the updated dataset with predictions to a new CSV file
reviews.to_csv('updated_car_reviews.csv', index=False)