### Data Preprocessing

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Example user preferences and bubble tea drinks data
user_preferences = pd.DataFrame({
    'user_id': [1, 2],
    'fruity': [1, 0],
    'milky': [0, 1],
    'with_tea': [1, 1],
    'refreshing': [1, 0],
    'fragrant': [0, 1],
    'adventurous': [1, 0],
    'cold': [1, 1]
})

bubble_tea_drinks = pd.DataFrame({
    'drink_id': [101, 102],
    'fruity': [1, 0],
    'milky': [0, 1],
    'with_tea': [1, 1],
    'refreshing': [1, 0],
    'fragrant': [0, 1],
    'cold': [1, 1],
    'distance': [0.5, 1.2],  # Distance in miles
    'popularity': [80, 60]  # Popularity score
})

# One-hot encoding categorical features if necessary
# Here we assume that all features are already numeric

# Merging user preferences with bubble tea drinks
def merge_preferences_and_drinks(user_prefs, drinks):
    merged_data = []
    for _, user in user_prefs.iterrows():
        for _, drink in drinks.iterrows():
            combined_features = {**user.to_dict(), **drink.to_dict()}
            combined_features['user_id'] = user['user_id']
            combined_features['drink_id'] = drink['drink_id']
            merged_data.append(combined_features)
    return pd.DataFrame(merged_data)

merged_data = merge_preferences_and_drinks(user_preferences, bubble_tea_drinks)

# Feature scaling for numerical features
scaler = StandardScaler()
merged_data[['distance', 'popularity']] = scaler.fit_transform(merged_data[['distance', 'popularity']])

print(merged_data)


### Model Training

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Assuming 'merged_data' is the preprocessed dataframe with labels
# Example labels (1 for liked/recommended, 0 for not liked/not recommended)
merged_data['label'] = [1, 0, 0, 1]  # Add your actual labels here

# Features and labels
X = merged_data.drop(['user_id', 'drink_id', 'label'], axis=1)
y = merged_data['label']

# Train a classifier
model = RandomForestClassifier()
model.fit(X, y)

# Predict for a new user preference
new_user_pref = pd.DataFrame({
    'fruity': [1],
    'milky': [0],
    'with_tea': [1],
    'refreshing': [1],
    'fragrant': [0],
    'adventurous': [1],
    'cold': [1],
    'distance': [0.4],  # Scaled value
    'popularity': [75]  # Scaled value
})
prediction = model.predict(new_user_pref)
print(prediction)
