In [None]:
# Install necessary libraries
# !pip install transformers pandas matplotlib

# Step 1: Load the CSV dataset
import pandas as pd

# Load the dataset
dataset = pd.read_csv("wine_reviews.csv")
print("Dataset Loaded Successfully!")
dataset.head()

# Step 2: Import HuggingFace Pipeline and Select the Zero-Shot Classification Model
from transformers import pipeline

# Define the zero-shot classification model
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
print("Model Loaded: facebook/bart-large-mnli")

# Step 3: Define Candidate Labels
candidate_labels = [
    "talks about food combinations",
    "talks about taste",
    "talks about value for money",
    "other"
]

# Step 4: Classify Reviews and Add Predicted Labels
def classify_review(review):
    # Ensure the review text is valid
    if pd.isna(review):
        return "other"
    # Perform classification
    result = classifier(review, candidate_labels)
    return result['labels'][0]  # Return the label with the highest score

# Add the predictions as a new column
dataset['talks_about'] = dataset['review'].apply(classify_review)

# Save the updated dataset (optional)
dataset.to_csv("classified_wine_reviews.csv", index=False)
print("Classification Completed and Saved to classified_wine_reviews.csv")

# Step 5: Visualize the Spread of Categories
import matplotlib.pyplot as plt

# Count the occurrences of each category
category_counts = dataset['talks_about'].value_counts()

# Plot a bar chart
plt.figure(figsize=(10, 6))
category_counts.plot(kind='bar', color='skyblue', edgecolor='black')
plt.title("Spread of Review Categories")
plt.xlabel("Categories")
plt.ylabel("Number of Reviews")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


Dataset Loaded Successfully!
