In [5]:
import os
import requests_cache
import pandas as pd
import requests
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
import seaborn as sns
from pytrends.request import TrendReq
import datetime
from ipywidgets import Text, Button, Output, VBox

# Set the pandas option to avoid future warnings
pd.set_option('future.no_silent_downcasting', True)

# API Setup
api_key = '9973533'  # Your provided API key
base_url = 'https://www.thecocktaildb.com/api/json/v2/'  # Updated to v2

# Initialize Google Trends API
pytrends = TrendReq(hl='en-US', tz=360)

# Function to fetch cocktails by ingredient
def get_cocktails_by_ingredient(ingredient, api_key):
    url = f'{base_url}{api_key}/filter.php?i={ingredient}'
    response = requests.get(url)
    return response.json()  # Return the JSON response directly

# Function to fetch detailed information for a cocktail
def get_cocktail_details(cocktail_id, api_key):
    url = f'{base_url}{api_key}/lookup.php?i={cocktail_id}'
    response = requests.get(url)
    return response.json()['drinks'][0]  # Return the first item from the 'drinks' list

# Function to fetch cocktail data for multiple ingredients
def fetch_cocktail_data(ingredients, api_key):
    cocktail_data = []
    for ingredient in ingredients:
        ingredient = ingredient.strip().lower()  # Strip spaces and convert to lowercase
        cocktails = get_cocktails_by_ingredient(ingredient, api_key)
        if 'drinks' in cocktails and cocktails['drinks'] is not None:
            for cocktail in cocktails['drinks']:
                details = get_cocktail_details(cocktail['idDrink'], api_key)
                cocktail_data.append(details)
    return pd.DataFrame(cocktail_data)

# Function to clean and preprocess the data
def clean_data(cocktail_df):
    # Drop rows with missing values
    cocktail_df.dropna(inplace=True)

    # Remove duplicate rows
    cocktail_df.drop_duplicates(inplace=True)

    # Normalize ingredient names by converting to lowercase
    ingredient_columns = [col for col in cocktail_df.columns if 'strIngredient' in col]
    for col in ingredient_columns:
        cocktail_df[col] = cocktail_df[col].str.lower()

    return cocktail_df

# Function to visualize ingredient popularity and combinations
def visualize_ingredient_popularity(cocktail_df, selected_ingredients):
    # Normalize column names for matching
    selected_ingredients = [ingredient.strip().lower() for ingredient in selected_ingredients]
    ingredient_columns = [col for col in cocktail_df.columns if 'strIngredient' in col]

    # Filter rows that contain any of the selected ingredients
    mask = cocktail_df[ingredient_columns].apply(lambda row: any(item in row.values for item in selected_ingredients), axis=1)
    filtered_df = cocktail_df[mask]

    if filtered_df.empty:
        print("No cocktails found with the selected ingredients.")
        return

    # Calculate frequency of selected ingredients
    ingredient_counts = pd.Series({ingredient: (filtered_df[ingredient_columns] == ingredient).sum().sum() for ingredient in selected_ingredients})

    # Plot ingredient popularity (histogram)
    plt.figure(figsize=(10, 6))
    sns.barplot(x=ingredient_counts.values, y=ingredient_counts.index, palette="viridis")
    plt.title('Popularity of Selected Ingredients in Cocktails')
    plt.xlabel('Frequency')
    plt.ylabel('Ingredients')
    plt.show()

# Function to apply clustering and analyze clusters
def cluster_and_analyze(cocktail_df):
    ingredient_columns = [col for col in cocktail_df.columns if 'strIngredient' in col]

    # Apply KMeans clustering
    kmeans = KMeans(n_clusters=5, random_state=42)  # Adjust the number of clusters if needed
    cocktail_df['cluster'] = kmeans.fit_predict(cocktail_df[ingredient_columns])

    # Analyze clusters
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x='Gin_trend', y='Vodka_trend', hue='cluster', data=cocktail_df, palette="Set2")
    plt.title('Cocktail Clusters by Trends')
    plt.xlabel('Gin Trend')
    plt.ylabel('Vodka Trend')
    plt.show()

    # Display silhouette score
    score = silhouette_score(cocktail_df[ingredient_columns], cocktail_df['cluster'])
    print(f'Silhouette Score: {score:.2f}')

# Main function to fetch data, process it, and display visualizations
def fetch_data_for_input(ingredients_input):
    ingredients = ingredients_input.split(',')
    cocktail_df = fetch_cocktail_data(ingredients, api_key)

    if cocktail_df.empty:
        print("No cocktails found for the selected ingredients.")
        return

    # Clean the data
    cocktail_df = clean_data(cocktail_df)
    
    # Visualize ingredient popularity and combinations
    visualize_ingredient_popularity(cocktail_df, ingredients)
    
    # Apply clustering and analyze clusters
    cluster_and_analyze(cocktail_df)

# Input widget for user to enter ingredients
ingredients_input = Text(value='Gin,Vodka', description='Ingredients:', placeholder='Enter ingredients separated by commas')
fetch_button = Button(description='Fetch Cocktails', style={'button_color': 'lightblue'})
output = Output()

def on_button_clicked(b):
    with output:
        output.clear_output()  # Clear previous output
        fetch_data_for_input(ingredients_input.value)

fetch_button.on_click(on_button_clicked)

# Display input and output widgets
display(VBox([ingredients_input, fetch_button, output]))

VBox(children=(Text(value='Gin,Vodka', description='Ingredients:', placeholder='Enter ingredients separated by…