In [10]:
# Imports
import pandas as pd
import numpy as np

In [11]:
# Load Recipes Data
data = pd.read_csv('../recipe_ingredients_dataset/ingredient_df.csv', index_col = 0)

In [12]:
data

Unnamed: 0_level_0,cuisine,alexia waffle fries,perciatelli,sweet and sour mix,corn bread crumbs,black cardamom pods,tumeric,fresh marjoram,sweet soy sauce,johnsonville hot & spicy breakfast links,...,extra virgin coconut oil,chive blossoms,satsuma imo,globe eggplant,frangipane,bee pollen,low sodium turkey breast,nonfat yogurt plain,oysters,nakano seasoned rice vinegar
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,spanish,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,mexican,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,french,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,chinese,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,italian,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49713,southern_us,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49714,irish,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49715,filipino,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49716,indian,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
 # Try to generate a name with up to three ingredients
def try_create_name(row, proteins, other_ingredients):
    '''Function that takes a list of ingredients as 
    an input and outputs a recipe name'''
    
    # Naming Convention of Recipe
    cuisine_style = f"{row['cuisine']} Style"
    
    # Creating a protein part of the name
    if len(proteins) > 1:
        protein_part = ' and '.join(proteins)  
    else:
        protein_part = ' '.join(proteins)

    # Combine all parts to create the full name
    if other_ingredients:
        full_name = ' '.join([cuisine_style, protein_part] + other_ingredients).strip()
    else:
        full_name = ' '.join([cuisine_style, protein_part]).strip()

    return full_name

In [16]:
# Function that generates random names for recipes
def generate_recipe_name(row, ingredient_cols, protein_list, existing_names):
    '''Function to generate a unique recipe name by dynamically selecting ingredients.
    If the initial name is not unique, it selects different ingredients or increases
    the number of ingredients used for naming'''
 
    # Identify which ingredients are present in the recipe
    ingredients = [col for col in ingredient_cols if row[col] == 1]
    proteins = [ing for ing in ingredients if ing in protein_list]
    other_ingredients = [ing for ing in ingredients if ing not in protein_list]

    # Select up to two proteins and the rest are other ingredients to make up to three total
    selected_proteins = proteins[:2]
    max_other_ingredients = 3 - len(selected_proteins)  # Calculate remaining slots for non-proteins
    selected_other_ingredients = other_ingredients[:max_other_ingredients]

    
    full_name = try_create_name(row, selected_proteins, selected_other_ingredients)

    # Check for uniqueness
    if full_name not in existing_names:
        existing_names.add(full_name)
        return full_name
    else:
        # If not unique, try other combinations using combinations of other ingredients
        from itertools import combinations
        
        # Generate all combinations of the remaining non-protein ingredients
        for num_others in range(1, len(other_ingredients) + 1):
            for combo in combinations(other_ingredients, num_others):
                new_ingredients = selected_proteins + list(combo)
                # Ensure no more than 3 ingredients are used
                if len(new_ingredients) > 3:
                    continue  
                new_name = try_create_name(row, selected_proteins, list(combo))
                if new_name not in existing_names:
                    existing_names.add(new_name)
                    return new_name

        # If all combinations tried are not unique, add a variant number
        new_name = full_name + f" Variant {len(existing_names)}"
        existing_names.add(new_name)
        return new_name

In [18]:
# Common list of proteins
common_proteins = ['chicken', 'beef', 'pork', 'fish', 'shrimp', 'tofu', 'turkey', 'ham']

# Exclude columns that are non-numeric 
ingredient_columns = data.select_dtypes(include=[np.number])

# Initialize an empty set to store existing recipe names
used_names = set()

# Applying the function to each row in the DataFrame
data['recipename'] = data.apply(lambda row: generate_recipe_name(row, ingredient_columns, common_proteins, used_names), axis=1)

In [19]:
# Check number of unique names
data['recipename'].nunique()

49718

In [20]:
data

Unnamed: 0_level_0,cuisine,alexia waffle fries,perciatelli,sweet and sour mix,corn bread crumbs,black cardamom pods,tumeric,fresh marjoram,sweet soy sauce,johnsonville hot & spicy breakfast links,...,chive blossoms,satsuma imo,globe eggplant,frangipane,bee pollen,low sodium turkey breast,nonfat yogurt plain,oysters,nakano seasoned rice vinegar,recipename
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,spanish,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,spanish Style minced onion garlic cloves arbo...
1,mexican,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,mexican Style yellow onion ground chipotle ch...
2,french,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,french Style egg yolks butter unflavored gelatin
3,chinese,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,chinese Style large eggs carrots sesame oil
4,italian,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,italian Style vanilla ice cream orange peel g...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49713,southern_us,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,southern_us Style onion gravy
49714,irish,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,irish Style bay leaf chopped fresh sage onions
49715,filipino,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,filipino Style flour tortillas crumbled blue ...
49716,indian,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,indian Style ginger mint leaves grated carrot


In [21]:
# write data to csv file
data.to_csv('../recipe_ingredients_dataset/ingredient_df_with_recipenames.csv')

In [22]:
df = pd.read_csv('../recipe_ingredients_dataset/ingredient_df_with_recipenames.csv')

In [None]:
df