In [1]:
import pandas as pd
import ast

In [7]:
df = pd.read_csv("app/data/recipe.csv")

In [8]:
df = df.drop(columns=['recipe_id', 'review_nums', 'aver_rate', 'reviews', 'cooking_directions'])
df.category = df.category.replace('main-dish', 'main')
df['name'] = df['recipe_name']
df = df.drop(columns=['recipe_name'])
df["ingredients"] = df["ingredients"].astype(str).str.replace(r"\^", ", ", regex=True)
df = df.drop_duplicates().dropna(how='all')


In [9]:
## nutritions

nutrients_to_keep = ["calories", "protein", "fat", "carbohydrates", "sugars", "fiber"]

def filter_nutritions(nutri_str):
    try:
        full_dict = ast.literal_eval(nutri_str)
        return {
            key: float(full_dict[key]["amount"])
            for key in nutrients_to_keep
            if key in full_dict and "amount" in full_dict[key]
        }
    except (ValueError, SyntaxError, TypeError):
        return {}

# Appliquer à la colonne
df["nutritions"] = df["nutritions"].apply(filter_nutritions)


In [10]:
## TAGS
def parse_tags_column(df, tag_column='tags'):
    # Définir les catégories de tags à extraire
    prep_time_keywords = [
        '15-minutes-or-less', '30-minutes-or-less', '60-minutes-or-less', '4-hours-or-less'
    ]
    diet_keywords = [
        'low-carb', 'low-sodium', 'low-cholesterol', 'low-in-something', 'very-low-carbs',
        'healthy', 'healthy-2', 'high-protein', 'free-of-something'
    ]
    dish_keywords = [
        'main-dish', 'breads', 'soups-stews', 'desserts', 'quick-breads', 'cookies-and-brownies',
        'pasta', 'poultry', 'meat', 'vegetables', 'fruit'
    ]
    seasonal_keywords = [
        'winter', 'fall', 'spring', 'summer', 'holiday-event', 'christmas'
    ]

    # Fonctions de sélection de tags
    def extract_first_match(tags, keywords):
        return next((tag for tag in tags if tag in keywords), None)

    def extract_all_matches(tags, keywords):
        return [tag for tag in tags if tag in keywords]

    # Parsing
    def process_tags(row):
        tags = str(row).split(';') if pd.notnull(row) else []

        return pd.Series({
            'prep_time': extract_first_match(tags, prep_time_keywords),
            'diet_type': ', '.join(extract_all_matches(tags, diet_keywords)) or None,
            'dish_type': ', '.join(extract_all_matches(tags, dish_keywords)) or None,
            'seasonal': ', '.join(extract_all_matches(tags, seasonal_keywords)) or None,
        })

    # Appliquer le parsing à chaque ligne
    tag_data = df[tag_column].apply(process_tags)
    
    # Fusionner avec le DataFrame initial
    df = pd.concat([df, tag_data], axis=1)
    
    return df


In [11]:
#pd.set_option('display.max_colwidth', None)  # Affiche le contenu complet d'une cellule
df = parse_tags_column(df)
df[['prep_time', 'diet_type', 'dish_type', 'seasonal']].head()


Unnamed: 0,prep_time,diet_type,dish_type,seasonal
0,4-hours-or-less,,"breads, quick-breads",
1,4-hours-or-less,"low-sodium, low-in-something","breads, fruit, vegetables, quick-breads",
2,4-hours-or-less,"healthy, low-cholesterol, healthy-2, low-in-so...",breads,
3,4-hours-or-less,"healthy, free-of-something",breads,
4,4-hours-or-less,,"breads, quick-breads","fall, winter"


In [13]:
df.head()

Unnamed: 0,category,image_url,ingredients,nutritions,tags,name,prep_time,diet_type,dish_type,seasonal
0,appetizer,https://images.media-allrecipes.com/userphotos...,"all-purpose flour, salt, baking soda, baking p...","{'calories': 255.1692, 'protein': 3.268513, 'f...",north-american;breads;easy;beginner-cook;inexp...,Mom's Zucchini Bread,4-hours-or-less,,"breads, quick-breads",
1,appetizer,http://images.media-allrecipes.com/userphotos/...,"chopped walnuts, eggs, white sugar, vegetable ...","{'calories': 276.0908, 'protein': 3.977222, 'f...",weeknight;breads;fruit;vegetables;kid-friendly...,Zucchini Walnut Bread,4-hours-or-less,"low-sodium, low-in-something","breads, fruit, vegetables, quick-breads",
2,appetizer,http://images.media-allrecipes.com/userphotos/...,"rapid rise yeast, white sugar, warm water (110...","{'calories': 156.4551, 'protein': 4.62651, 'fa...",healthy;breads;dietary;low-cholesterol;healthy...,Honey Wheat Bread I,4-hours-or-less,"healthy, low-cholesterol, healthy-2, low-in-so...",breads,
3,dessert,https://images.media-allrecipes.com/userphotos...,"eggs, white sugar, vegetable oil, vanilla extr...","{'calories': 280.3475, 'protein': 3.563895, 'f...",for-large-groups;healthy;breads;kid-friendly;d...,Chocolate Chip Orange Zucchini Bread,4-hours-or-less,"healthy, free-of-something",breads,
4,dessert,https://images.media-allrecipes.com/userphotos...,"white sugar, pumpkin puree, vegetable oil, wat...","{'calories': 210.1581, 'protein': 2.556006, 'f...",breads;fall;winter;seasonal;quick-breads;4-hou...,Chocolate Chip Pumpkin Bread,4-hours-or-less,,"breads, quick-breads","fall, winter"


In [2]:
df = pd.read_csv('app/data/recipes_clean.csv')

In [10]:
pd.options.display.max_colwidth(width=4000)
df.nutritions.head(1)

TypeError: 'int' object is not callable