In [25]:
import pandas as pd
from transformers import pipeline

# Load the dataset
wine_data = pd.read_csv('WineDataset.csv')

# Test with the first 10 instances, you can comment this out
# wine_data = wine_data.head(10)

# Initialize the zero-shot classification pipeline using the Hugging Face model
classifier = pipeline(model="facebook/bart-large-mnli")

# Define the candidate dish categories for pairing
candidate_labels = ["meat", "poultry", "fish", "cheese", "vegetables"]



In [26]:
def classify_wine(description):
    # For the 4 stupid wines that had no description
    if not isinstance(description, str):
        return {label: 0.0 for label in candidate_labels}
    result = classifier(description, candidate_labels=candidate_labels)
    # print(result)
    return {label: score for label, score in zip(result['labels'], result['scores'])}

In [28]:
# Apply the classification function to the 'Description' column 
# This returns a Series of dictionaries with dish scores for each row
classification_results = wine_data['Description'].apply(classify_wine)

# Convert the Series of dictionaries into a DataFrame where each dish category becomes a column
classification_df = pd.DataFrame(classification_results.tolist())

# Concatenate the original DataFrame with the new DataFrame containing the classification scores
wine_data = pd.concat([wine_data, classification_df], axis=1)

# Save the modified dataset with classification scores to a JSON file
wine_data.to_json('WineDataset_with_Dishes.json', orient='records')

{'sequence': "We asked some of our most prized winemakers working in Spain to make the best wines possible – no rules, no restrictions, no red tape. The Guv'nor collection was their answer. Made from Tempranillo grapes picked from their favourite vineyards across Spain, it’s a crowd-pleasing wine designed to be shared with good food – and even better company. It’s bold. It’s fruity. It’s a modern Spanish red that goes with everything, especially spicy barbecue meats or anything chargrilled.", 'labels': ['meat', 'vegetables', 'poultry', 'cheese', 'fish'], 'scores': [0.9287120699882507, 0.025151513516902924, 0.017997141927480698, 0.014859834685921669, 0.013279418461024761]}


KeyboardInterrupt: 