In [3]:
from datetime import datetime
import uuid
import pandas as pd
from sqlalchemy.orm import sessionmaker
from sqlalchemy import MetaData
import numpy as np
# from app.database.queries import insert_data
from typing import List, Dict
import logging

In [5]:
logger = logging.getLogger(__name__)  # Use a named logger

In [4]:
logging.basicConfig(level=logging.INFO)
RECIPE = "../../app/data/processed/recipe_cleaned.json"
INGREDIENTS = "../../app/data/processed/ingredients_AI.json"
MEAL = "../../app/data/processed/meal_cleaned.json"


In [6]:
logger.info("PREPARING DATA")
data: pd.DataFrame = pd.read_json(INGREDIENTS)
data = data.replace({np.nan: None, '': None, ' ': None})
data.head()

INFO:__main__:PREPARING DATA


Unnamed: 0,main_ingredient,quantity,weight,ingredient_id,alternative_ingredient,measurement
0,plantains,3.0,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,,
1,milk,2.5,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,plant-based milk,cups
2,lime,1.0,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,,half
3,vanilla bean paste,1.0,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,vanilla extract,tsp
4,all purpose flour,2.5,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,,


In [7]:
unique_ingredients = data[["main_ingredient"]].drop_duplicates()
unique_ingredients["ingredientId"] = range(12, len(unique_ingredients) + 12)
recipe_to_ingredients = data.merge(unique_ingredients, on="main_ingredient", how="left")
recipe_to_ingredients.head(3)

Unnamed: 0,main_ingredient,quantity,weight,ingredient_id,alternative_ingredient,measurement,ingredientId
0,plantains,3.0,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,,,12
1,milk,2.5,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,plant-based milk,cups,13
2,lime,1.0,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,,half,14


In [8]:
alternative_ingredient = data[["alternative_ingredient"]].drop_duplicates()
alternative_ingredient["alternativeOfId"] = range(1, len(alternative_ingredient) + 1)
recipe_to_ingredients = recipe_to_ingredients.merge(alternative_ingredient, on="alternative_ingredient", how="left")
recipe_to_ingredients.head(3)

Unnamed: 0,main_ingredient,quantity,weight,ingredient_id,alternative_ingredient,measurement,ingredientId,alternativeOfId
0,plantains,3.0,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,,,12,1
1,milk,2.5,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,plant-based milk,cups,13,2
2,lime,1.0,,2c2703b4-ed39-468d-a09b-4dda480e3bc7,,half,14,1


In [7]:
# alt_ingredient = data[["alternative_ingredient"]].drop_duplicates()
# alt_ingredient["alternative_ingredient"] = alt_ingredient[alt_ingredient["alternative_ingredient"] != None]
# alt_ingredient["alternativeOfId"] = range(1, len(alt_ingredient)+1)
# recipe_to_ingredients = recipe_to_ingredients.merge(alt_ingredient, on="alternative_ingredient", how="left")
# recipe_to_ingredients.head(3)

In [9]:

measurements = data[["measurement"]].drop_duplicates().reset_index(drop=True)
# measurement_AI = pd.read_json("../../app/data/processed/measurements_AI.json")
measurements["measurementId"] = range(6, len(measurements) + 6)
recipe_to_ingredients = recipe_to_ingredients.merge(measurements, on="measurement", how="left")
len(recipe_to_ingredients)

735

In [10]:
recipe_to_ingredients = recipe_to_ingredients.rename(columns={"ingredient_id": "recipe_id"})
recipe_to_ingredients.drop(columns=['main_ingredient', "measurement", "weight", "alternative_ingredient"], inplace=True)
data_dict: List[Dict] = recipe_to_ingredients.to_dict(orient='records')  # type: ignore
print(data_dict)

[{'quantity': '3', 'recipe_id': '2c2703b4-ed39-468d-a09b-4dda480e3bc7', 'ingredientId': 12, 'alternativeOfId': 1, 'measurementId': 6}, {'quantity': '2.5', 'recipe_id': '2c2703b4-ed39-468d-a09b-4dda480e3bc7', 'ingredientId': 13, 'alternativeOfId': 2, 'measurementId': 7}, {'quantity': '1', 'recipe_id': '2c2703b4-ed39-468d-a09b-4dda480e3bc7', 'ingredientId': 14, 'alternativeOfId': 1, 'measurementId': 8}, {'quantity': '1', 'recipe_id': '2c2703b4-ed39-468d-a09b-4dda480e3bc7', 'ingredientId': 15, 'alternativeOfId': 3, 'measurementId': 9}, {'quantity': '2.5', 'recipe_id': '2c2703b4-ed39-468d-a09b-4dda480e3bc7', 'ingredientId': 16, 'alternativeOfId': 1, 'measurementId': 6}, {'quantity': '½', 'recipe_id': '2c2703b4-ed39-468d-a09b-4dda480e3bc7', 'ingredientId': 17, 'alternativeOfId': 1, 'measurementId': 6}, {'quantity': '2', 'recipe_id': '2c2703b4-ed39-468d-a09b-4dda480e3bc7', 'ingredientId': 18, 'alternativeOfId': 1, 'measurementId': 9}, {'quantity': '½', 'recipe_id': '2c2703b4-ed39-468d-a09b-4

In [11]:
import unicodedata
from fractions import Fraction

unicodes = recipe_to_ingredients[["quantity"]]
unicodes["id"] = range(1, len(unicodes)+1)

unicode_list = unicodes["quantity"]
unicode_id = unicodes['id']

unicode_json = []

def contains_unicode(input_str):
    return any(ord(char) > 127 for char in input_str)

for unicode, id in zip(unicode_list, unicode_id):
    full = 0
    description = ""
    unicode_dict = {}
    if unicode is not None:    
        if contains_unicode(unicode) or "/" in unicode:
            # print(unicode)
            unicode_str = unicodedata.normalize("NFKC", unicode).replace("⁄", "/")
            if ' ' in unicode_str:
                if "-" in unicode_str:
                    parts = unicode_str.split("-")
                    full = float(Fraction(parts[0].strip()))
                else:
                    parts = unicode_str.split()
                    whole_number = int(parts[0])
                    fraction_part = Fraction(parts[1])
                    full = whole_number + float(fraction_part)
            else:
                full = float(Fraction(unicode_str))
        else:
            try:
                full = float(unicode)
            except Exception as e:
                # print(e)
                if isinstance(unicode, str):
                    full = -1
                    description = str(unicode)
                    print(description)
        unicode_dict["id"] = id
        unicode_dict["value"] = full
        unicode_dict["description"] = description
    else:
        full = -1
        description = "None"
        unicode_dict["id"] = id
        unicode_dict["value"] = full
    
    unicode_json.append(unicode_dict.copy())
    
# print(unicode_json)

to taste
to taste
to taste
to taste
a few
15-20
3-4
20-25
to taste
small thumb size
to taste
few
15-20
to taste
3-4
20-25
a small handful
15-20


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unicodes["id"] = range(1, len(unicodes)+1)


In [12]:
quantity_details = pd.DataFrame(unicode_json)
rti_copy = recipe_to_ingredients.copy()

rti_copy["quantity"] = quantity_details["value"]
rti_copy["quantityDescription"] = quantity_details["description"]
rti_copy

Unnamed: 0,quantity,recipe_id,ingredientId,alternativeOfId,measurementId,quantityDescription
0,3.0,2c2703b4-ed39-468d-a09b-4dda480e3bc7,12,1,6,
1,2.5,2c2703b4-ed39-468d-a09b-4dda480e3bc7,13,2,7,
2,1.0,2c2703b4-ed39-468d-a09b-4dda480e3bc7,14,1,8,
3,1.0,2c2703b4-ed39-468d-a09b-4dda480e3bc7,15,3,9,
4,2.5,2c2703b4-ed39-468d-a09b-4dda480e3bc7,16,1,6,
...,...,...,...,...,...,...
730,3.0,4b9172ed-03d7-46f3-a207-8eb557f94f00,43,1,16,
731,1.0,4b9172ed-03d7-46f3-a207-8eb557f94f00,61,1,21,
732,2.0,4b9172ed-03d7-46f3-a207-8eb557f94f00,48,1,44,
733,1.0,9250ea88-fa19-4c76-8b33-45aac57863e4,12,1,6,


In [13]:
rti_copy = rti_copy.rename(columns={"recipe_id": "recipeId"})
rti_copy["id"] = range(22, len(rti_copy)+22)
rti_copy = rti_copy[["id","quantity", "measurementId", "ingredientId", "alternativeOfId", "recipeId", "quantityDescription"]]
rti_copy

Unnamed: 0,id,quantity,measurementId,ingredientId,alternativeOfId,recipeId,quantityDescription
0,22,3.0,6,12,1,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
1,23,2.5,7,13,2,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
2,24,1.0,8,14,1,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
3,25,1.0,9,15,3,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
4,26,2.5,6,16,1,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
...,...,...,...,...,...,...,...
730,752,3.0,16,43,1,4b9172ed-03d7-46f3-a207-8eb557f94f00,
731,753,1.0,21,61,1,4b9172ed-03d7-46f3-a207-8eb557f94f00,
732,754,2.0,44,48,1,4b9172ed-03d7-46f3-a207-8eb557f94f00,
733,755,1.0,6,12,1,9250ea88-fa19-4c76-8b33-45aac57863e4,


In [None]:
data_dict = rti_copy.to_dict(orient="records")
# insert_data(data=data_dict, schema="RecipeIngredient")

In [15]:
# u_ingredients = unique_ingredients.rename(columns={"ingredientId" : "id", "main_ingredient": "name"})
# u_ingredients = u_ingredients[["id", "name"]]
# u_ingdict = u_ingredients.to_dict(orient="records")
# insert_data(data=u_ingdict, schema="Ingredient")

In [15]:
rti_copy

Unnamed: 0,id,quantity,measurementId,ingredientId,alternativeOfId,recipeId,quantityDescription
0,22,3.0,6,12,1,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
1,23,2.5,7,13,2,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
2,24,1.0,8,14,1,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
3,25,1.0,9,15,3,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
4,26,2.5,6,16,1,2c2703b4-ed39-468d-a09b-4dda480e3bc7,
...,...,...,...,...,...,...,...
730,752,3.0,16,43,1,4b9172ed-03d7-46f3-a207-8eb557f94f00,
731,753,1.0,21,61,1,4b9172ed-03d7-46f3-a207-8eb557f94f00,
732,754,2.0,44,48,1,4b9172ed-03d7-46f3-a207-8eb557f94f00,
733,755,1.0,6,12,1,9250ea88-fa19-4c76-8b33-45aac57863e4,


In [8]:
logger.info("PREPARING DATA")
data: pd.DataFrame = pd.read_json(RECIPE)
data = data.replace({np.nan: None, '': None, ' ': None})
data.head()

INFO:__main__:PREPARING DATA


Unnamed: 0,recipe_id,course,cuisine,servings,calories,prep time,cook time,total time,author,keyword,soaking time,marinating time,cooling time,resting time,wait time
0,2c2703b4-ed39-468d-a09b-4dda480e3bc7,Breakfast,West African,6.0,455.0,15,20,35,,,0,0,0,0,0
1,099498aa-d5d1-4be5-be81-8525014cd5da,"Main Course, Side Dish","Cameroonian, Nigerian",8.0,362.0,20,60,80,Yummy Medley,,0,0,0,0,0
2,9534b335-0657-4da0-abc7-c2a58272f25a,Breakfast,"Nigerian, Senegalese, West African",8.0,197.0,5,35,0,,,0,0,0,0,0
3,8d378ba0-3b46-4a23-b349-a1a183bbc4ec,"Dinner, Main Course, seafood","Nigerian, Tropical",6.0,428.3,20,30,50,Yummy Medley,,0,0,0,0,0
4,ac3857b2-7a42-4e88-af04-35361b550f2b,"Appetizer, Main Course",Nigerian,6.0,175.3,20,45,0,,,0,0,0,0,0
