In [None]:
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("alcohol_snack_pairing_final.csv")

# Save original names **before** encoding
original_snacks = df["Snack Name"].unique().tolist()
original_drinks = df["Beverage Name"].unique().tolist()

# Store Pairing Notes before dropping the column
if "Pairing Notes" in df.columns:
    pairing_notes = dict(zip(zip(df["Beverage Name"], df["Snack Name"]), df["Pairing Notes"]))
    df = df.drop(columns=["Pairing Notes"])  # Remove pairing notes
else:
    pairing_notes = {}

# List categorical columns (excluding target variables)
categorical_columns = ["Type", "Flavor Profile", "Acidity Level", "Sweetness Level", 
                       "Tannin Level", "Serving Temperature", "Cuisine Type", "Snack Flavor Profile",
                       "Texture", "Fat Content", "Spice Level"]

# Apply Label Encoding to all categorical features
encoders = {}  # Dictionary to store encoders
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))  # Convert to string before encoding
    encoders[col] = le  # Save encoder for later use

# **Fix:** Ensure all snacks and drinks are encoded properly
snack_encoder = LabelEncoder()
df["Snack Name"] = snack_encoder.fit_transform(df["Snack Name"].astype(str))

drink_encoder = LabelEncoder()
df["Beverage Name"] = drink_encoder.fit_transform(df["Beverage Name"].astype(str))

# **Save processed dataset for debugging**
df.to_csv("processed_dataset.csv", index=False)  # ✅ Save processed dataset

# Save encoders
joblib.dump(encoders, "feature_encoders.pkl")  # Save feature encoders
joblib.dump(snack_encoder, "snack_encoder.pkl")
joblib.dump(drink_encoder, "drink_encoder.pkl")
joblib.dump(pairing_notes, "pairing_notes.pkl")  # ✅ Save pairing notes properly
joblib.dump(original_snacks, "original_snacks.pkl")  # Save correct original names
joblib.dump(original_drinks, "original_drinks.pkl")  # Save correct original names

# Train Drink Recommendation Model
X = df.drop(["Beverage Name"], axis=1)  # Now all columns are numeric
y = df["Beverage Name"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

drink_model = RandomForestClassifier(n_estimators=100, random_state=42)
drink_model.fit(X_train, y_train)

y_pred = drink_model.predict(X_test)
print(f"✅ Drink Model Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

joblib.dump(drink_model, "drink_recommendation_model.pkl")

# Train Snack Recommendation Model
X_snack = df.drop(["Snack Name"], axis=1)
y_snack = df["Snack Name"]

X_train, X_test, y_train, y_test = train_test_split(X_snack, y_snack, test_size=0.2, random_state=42)

snack_model = RandomForestClassifier(n_estimators=100, random_state=42)
snack_model.fit(X_train, y_train)

y_pred_snack = snack_model.predict(X_test)
print(f"✅ Snack Model Accuracy: {accuracy_score(y_test, y_pred_snack) * 100:.2f}%")

joblib.dump(snack_model, "snack_recommendation_model.pkl")

print("✅ Models Trained and Saved Successfully!")


In [57]:
import joblib

# Load encoders and original names
snack_encoder = joblib.load("snack_encoder.pkl")
drink_encoder = joblib.load("drink_encoder.pkl")
original_snacks = joblib.load("original_snacks.pkl")  # This should contain real names
original_drinks = joblib.load("original_drinks.pkl")  # This should contain real names

# Print values
print("Encoded Snacks:", list(snack_encoder.classes_))  # Should contain names, not numbers
print("Original Snacks:", original_snacks)  # Should contain real snack names
print("Encoded Drinks:", list(drink_encoder.classes_))  # Should contain names, not numbers
print("Original Drinks:", original_drinks)  # Should contain real drink names


Encoded Snacks: ['Barbecue Wings', 'Bhujia', 'Buffalo Wings', 'Chakli', 'Cheese Platter', 'Chips & Guacamole', 'Dark Chocolate', 'Dhokla', 'French Fries', 'Grilled Chicken', 'Jalebi', 'Masala Peanuts', 'Paneer Tikka', 'Pani Puri', 'Peanut Butter Cookies', 'Popcorn', 'Pretzels', 'Salted Nuts', 'Samosa', 'Spicy Nachos']
Original Snacks: ['Barbecue Wings', 'Grilled Chicken', 'Pretzels', 'Chakli', 'Chips & Guacamole', 'Barbecue Wings', 'French Fries', 'Dark Chocolate', 'Masala Peanuts', 'Spicy Nachos', 'Grilled Chicken', 'Salted Nuts', 'Pani Puri', 'Barbecue Wings', 'Paneer Tikka', 'Samosa', 'Buffalo Wings', 'Salted Nuts', 'Jalebi', 'Dhokla', 'Paneer Tikka', 'Salted Nuts', 'Grilled Chicken', 'Chakli', 'Paneer Tikka', 'Dhokla', 'Salted Nuts', 'Chakli', 'Paneer Tikka', 'French Fries', 'Samosa', 'Pretzels', 'Bhujia', 'Jalebi', 'Cheese Platter', 'Peanut Butter Cookies', 'Samosa', 'Salted Nuts', 'Barbecue Wings', 'Grilled Chicken', 'Popcorn', 'Salted Nuts', 'Paneer Tikka', 'Jalebi', 'Jalebi', '

In [33]:
from xgboost import XGBClassifier

drink_model = XGBClassifier(n_estimators=300, learning_rate=0.05, max_depth=5)
snack_model = XGBClassifier(n_estimators=300, learning_rate=0.05, max_depth=5)
