In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd

df = pd.read_json('../data/final_ingredient_data.json')

In [2]:
import sys
from pathlib import Path

# in jupyter (lab / notebook), based on notebook path
module_path = str(Path.cwd().parents[0])

if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
import re
from dictionary.regex_util import number_with_special_character_regex
from dictionary.protein_carbo import protein_carbo_list
from dictionary.ingredient_merg import ingredient_merg
from dictionary.calories_list import calories_list
from dictionary.ingredient_dict import ingredient_dict
from dictionary.unit_dict import measure, amount_for_uncount_unit

In [4]:
def set_amount(ingredient):
  if ingredient['unit'] in measure:
    return ingredient['amount']*measure[ingredient['unit']] 
  elif ingredient['unit'] in amount_for_uncount_unit[ingredient['ingredientName']]:
    return ingredient['amount']*amount_for_uncount_unit[ingredient['ingredientName']][ingredient['unit']]
  if ingredient['amount'] > 0:
    return ingredient['amount']
  else: return 0

In [5]:
def merge_food_ingredient(ingredients):
  new_ingre = {}
  for ingre in ingredients:
    for m in ingredient_merg:
      if ingre['ingredientName'] in ingredient_merg[m]:
        if m not in new_ingre:
          new_ingre[m] = 0
        new_ingre[m]+= set_amount(ingre)
  return new_ingre

In [6]:
df['ingredients'][0]

[{'ingredientName': 'วุ้นเส้น', 'amount': 1, 'unit': 'ซอง'},
 {'ingredientName': 'หมูกรอบ', 'amount': 1, 'unit': 'cup'},
 {'ingredientName': 'ใบกะเพรา', 'amount': 1, 'unit': 'cup'},
 {'ingredientName': 'พริก', 'amount': 4, 'unit': 'เม็ด'},
 {'ingredientName': 'กระเทียม', 'amount': 4, 'unit': 'lobe'},
 {'ingredientName': 'ซีอิ๊วขาว', 'amount': 0.5, 'unit': 'table_spoon'},
 {'ingredientName': 'น้ำตาล', 'amount': 1, 'unit': 'table_spoon'},
 {'ingredientName': 'ผงปรุงรส', 'amount': 0.5, 'unit': 'table_spoon'},
 {'ingredientName': 'น้ำเปล่า', 'amount': 2.125, 'unit': 'table_spoon'},
 {'ingredientName': 'น้ำมัน', 'amount': 1, 'unit': 'table_spoon'}]

In [7]:
df['mergeIngredients'] = df['ingredients'].apply(merge_food_ingredient)

In [8]:
def calulate_ingredient_cal(ingredients):
  cal = 0
  for ingre in ingredients:
    if ingre in calories_list:
      cal += calories_list[ingre]* ingredients[ingre]
  return cal

In [9]:
df['calories'] = df['mergeIngredients'].apply(calulate_ingredient_cal)

In [10]:
df[df['calories'] > 1500]

Unnamed: 0,title,url,allTimeScore,view,totalLike,photos,ingredients,tags,is_has_amoungs,mergeIngredients,calories
15,ก๋วยเตี๋ยวไก่ตุ๋น,https://www.wongnai.com/recipes/ugc/0724e57bd8...,2027,1897,4,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เนื้อไก่', 'amount': 6, '...","[{'externalId': 'chicken-recipes', 'primaryNam...",1.000000,"{'chicken': 600.0, 'offal': 400.0, 'vegetable'...",2369.5000
31,ยำหมูยอไข่แดงเค็ม,https://www.wongnai.com/recipes/ugc/fddf94d916...,1118,1118,0,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'หมูยอ', 'amount': 10, 'un...","[{'externalId': 'main-dish-recipes', 'primaryN...",1.000000,"{'pork': 1000.0, 'egg': 750.0, 'smell_flavor':...",3821.5000
36,หมูปิ้งสูตรนุ๊มนุ่ม,https://www.wongnai.com/recipes/ugc/535006fbd1...,101866,101376,37,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เนื้อหมู', 'amount': 1, '...","[{'externalId': 'pork-recipes', 'primaryName':...",1.000000,"{'pork': 1000, 'salty_flavor': 115.0, 'milk': ...",2552.0000
43,หมี่เหลืองผัดซีอิ๊ว,https://www.wongnai.com/recipes/ugc/32277f99cd...,68578,68238,11,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'บะหมี่', 'amount': 0.5, '...","[{'externalId': 'pork-recipes', 'primaryName':...",1.000000,"{'noodle': 500.0, 'pork': 500.0, 'vegetable': ...",2498.8000
47,ผัดซีอิ๊วเส้นเหลืองหมู,https://www.wongnai.com/recipes/ugc/87725ee94e...,1312,1182,4,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'บะหมี่', 'amount': 1, 'un...","[{'externalId': 'pork-recipes', 'primaryName':...",1.000000,"{'noodle': 1000, 'pork': 400, 'vegetable': 300...",2963.0000
...,...,...,...,...,...,...,...,...,...,...,...
2775,ข้าวต้มแห้งหมูสับ,https://www.wongnai.com/recipes/ugc/9861529d06...,4699,4599,4,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เนื้อหมู', 'amount': 3, '...","[{'externalId': 'pork-recipes', 'primaryName':...",0.555556,"{'pork': 800.0, 'rice': 500.0}",2586.0000
2776,สปาเกตตี้มิลานเนส,https://www.wongnai.com/recipes/ugc/6752503858...,504,494,1,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เส้นสปาเก็ตตี้', 'amount'...","[{'externalId': 'pork-recipes', 'primaryName':...",0.545455,"{'noodle': 700.0, 'pork': 220, 'salty_flavor':...",1820.1875
2778,ข้าวราดแกงกะหรี่ญี่ปุ่น,https://www.wongnai.com/recipes/ugc/b951b197dd...,4323,3923,11,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เนื้อหมู', 'amount': 1, '...","[{'externalId': 'pork-recipes', 'primaryName':...",0.545455,"{'pork': 1000, 'vegetable': 279.0, 'smell_flav...",3063.3500
2780,ข้าวไรซ์เบอรรี่คลุกกะปิ,https://www.wongnai.com/recipes/ugc/9cb3c02985...,879,839,4,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'ข้าวไรซ์เบอรี่', 'amount'...","[{'externalId': 'main-dish-recipes', 'primaryN...",0.545455,"{'rice': 660, 'salty_flavor': 15, 'smell_flavo...",2263.8000


In [11]:
df.iloc[2719]['url']

'https://www.wongnai.com/recipes/ugc/36cb20f8ab6d42bc98e1703335f1d1a4'

In [12]:
df_yield = pd.read_json('../data/0.recipe_data_ugc.json')

In [13]:
def remove_leading_zeros(s: str):
    parts = s.split('.')
    if len(parts) == 2:
        left, right = parts
        if left == '0':
            left = ''
        return f"{left}.{right}"
    else:
        return s.lstrip('0') or '0'

def recur_change_to_number(number_list):
  if not number_list:
    return 0
  if '-' in number_list:
    dash_index = number_list.index('-')
    return (recur_change_to_number(number_list[:dash_index]) + recur_change_to_number(number_list[dash_index+1:]))/2
  number_list2 = []
  for num in number_list:
    try:
      number_list2.append(remove_leading_zeros(str(eval(num))))
    except:
      number_list2.append(num.strip())
  try:
    return eval(''.join(number_list2))
  except:
    # print(number_list2)


    if '+' == number_list2[-1]:
      recur_change_to_number(number_list2[:-1])
    else:
      # print('+'.join(number_list2))
      return eval('+'.join(number_list2))

def check_recur(num_list):
  a = recur_change_to_number(num_list)
  if a:
    return a
  return 1


In [14]:
df_yield['yield_num'] = df_yield['yield'].apply(lambda x: check_recur(re.findall(number_with_special_character_regex,x)))

In [15]:
df_yield['url'] = df_yield['url'].apply(lambda x: 'https://www.wongnai.com/'+ x)

In [16]:
df_yield = df_yield[['url','yield_num']]

In [17]:
merged_df = pd.merge(df, df_yield, on='url')

In [18]:
merged_df['calories2'] = merged_df['calories']/ merged_df['yield_num']

In [19]:
def merge_food_ingredient(ingredients):
  new_ingre = {}
  for ingre in ingredients:
    for m in ingredient_merg:
      if ingre['ingredientName'] in ingredient_merg[m]:
        if m not in new_ingre:
          new_ingre[m] = 0
        new_ingre[m]+= set_amount(ingre)
  return new_ingre

In [20]:
def merge_food_nutrients(ingredients):
  new_ingre = {}
  for ingre in ingredients:
    for m in protein_carbo_list:
      if ingre in protein_carbo_list[m]:
        if m not in new_ingre:
          new_ingre[m] = 0
        new_ingre[m]+= ingredients[ingre]
  return new_ingre

In [21]:
def cal_nutrients_percen(di):
  d = {**di}
  sumary = sum(d.values())
  if sumary:
    for x in d:
      d[x] = d[x]/sumary
  return d

In [22]:
merged_df['nutrients'] = merged_df['mergeIngredients'].apply(merge_food_nutrients)
merged_df['sum_nutrients'] = merged_df['nutrients'].apply(lambda x: sum(x.values()))
merged_df['nutrients_percen'] = merged_df['nutrients'].apply(cal_nutrients_percen)

In [23]:
merged_df.iloc[1]['ingredients']

[{'ingredientName': 'ไข่ไก่', 'amount': 2, 'unit': 'buble'},
 {'ingredientName': 'บะหมี่กึ่งสำเร็จรูป', 'amount': 1, 'unit': 'ก้อน'},
 {'ingredientName': 'นม', 'amount': 0.5, 'unit': 'กล่อง'},
 {'ingredientName': 'แครอท', 'amount': -1, 'unit': 'avg'},
 {'ingredientName': 'กระดูกหมู', 'amount': -1, 'unit': 'avg'},
 {'ingredientName': 'น้ำปลา', 'amount': 2, 'unit': 'tea_spoon'},
 {'ingredientName': 'พริกไทย', 'amount': 1, 'unit': 'tea_spoon'}]

In [24]:
def cal_spicy(x):
  if 'spicy_flavor' in x:
    return 3
  if 'mediumspicy_flavor' in x:
    return 2
  if 'little_spicy' in x:
    return 1
  return 0

In [25]:
merged_df['spicyLevel'] = merged_df['mergeIngredients'].apply(cal_spicy)

In [26]:
merged_df[merged_df['spicyLevel'] ==1].sample(3)

Unnamed: 0,title,url,allTimeScore,view,totalLike,photos,ingredients,tags,is_has_amoungs,mergeIngredients,calories,yield_num,calories2,nutrients,sum_nutrients,nutrients_percen,spicyLevel
1701,ต้นกระเทียมผัดหมูสามชั้น,https://www.wongnai.com/recipes/ugc/d266ae762d...,673,663,1,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เนื้อหมู', 'amount': 300,...","[{'externalId': 'pork-recipes', 'primaryName':...",1.0,"{'pork': 300, 'salty_flavor': 22.5, 'vegetable...",726.0,1.0,726.0,"{'protein': 300, 'mineral': 0}",300.0,"{'protein': 1.0, 'mineral': 0.0}",1
205,สุกี้อกไก่,https://www.wongnai.com/recipes/ugc/e475eefa44...,1633,1493,5,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เนื้อไก่', 'amount': 200,...","[{'externalId': 'chicken-recipes', 'primaryNam...",1.0,"{'chicken': 200, 'vegetable': 155.0, 'salty_fl...",656.25,2.0,328.125,"{'protein': 250, 'mineral': 155.0}",405.0,"{'protein': 0.6172839506172839, 'mineral': 0.3...",1
1271,อกไก่ทอดกรอบ,https://www.wongnai.com/recipes/ugc/d8f22083de...,8558,8478,8,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เนื้อไก่', 'amount': 200,...","[{'externalId': 'chicken-recipes', 'primaryNam...",1.0,"{'chicken': 200, 'salty_flavor': 5, 'little_sp...",478.0,1.0,478.0,{'protein': 200},200.0,{'protein': 1.0},1


In [27]:
merged_df['calories'] = merged_df['calories2']
merged_df.drop('calories2',axis=1)

Unnamed: 0,title,url,allTimeScore,view,totalLike,photos,ingredients,tags,is_has_amoungs,mergeIngredients,calories,yield_num,nutrients,sum_nutrients,nutrients_percen,spicyLevel
0,ผัดวุ้นเส้นกระเพราหมูกรอบ,https://www.wongnai.com/recipes/ugc/273249a788...,1939,1939,0,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'วุ้นเส้น', 'amount': 1, '...","[{'externalId': 'pork-recipes', 'primaryName':...",1.000000,"{'noodle': 50.0, 'pork': 220, 'smell_flavor': ...",622.400000,1.0,"{'carbo': 50.0, 'protein': 220}",270.0,"{'carbo': 0.18518518518518517, 'protein': 0.81...",3
1,ไข่ตุ๋นมาม่า,https://www.wongnai.com/recipes/ugc/d2ca1bdc51...,1519,1179,5,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'ไข่ไก่', 'amount': 2, 'un...","[{'externalId': 'egg-recipes', 'primaryName': ...",1.000000,"{'egg': 100, 'noodle': 60.0, 'milk': 50.0, 've...",293.000000,1.0,"{'protein': 100, 'carbo': 110.0, 'mineral': 0}",210.0,"{'protein': 0.47619047619047616, 'carbo': 0.52...",1
2,เกี่ยมอี๋คั่วไส้หมูก้อน,https://www.wongnai.com/recipes/ugc/ffbd2984f0...,946,826,6,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เกี่ยมอี๋ลวก', 'amount': ...","[{'externalId': 'main-dish-recipes', 'primaryN...",1.000000,"{'pork': 200.0, 'egg': 100, 'smell_flavor': 39...",639.000000,1.0,{'protein': 300.0},300.0,{'protein': 1.0},0
3,ออรินจิคั่วเกลือพริกไทย,https://www.wongnai.com/recipes/ugc/66e7abffe9...,1862,1792,4,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เห็ด', 'amount': 3, 'unit...","[{'externalId': 'chicken-recipes', 'primaryNam...",1.000000,"{'vegetable': 300, 'chicken': 200, 'salty_flav...",336.500000,2.0,"{'mineral': 300, 'protein': 200}",500.0,"{'mineral': 0.6, 'protein': 0.4}",1
4,กระหล่ำปลีทอดน้ำปลา,https://www.wongnai.com/recipes/ugc/f0e49ced08...,1365,1305,3,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'กะหล่ำปลี', 'amount': 10,...","[{'externalId': 'vegetable-recipes', 'primaryN...",1.000000,"{'vegetable': 100.0, 'salty_flavor': 20, 'smel...",32.500000,2.0,{'mineral': 100.0},100.0,{'mineral': 1.0},0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2782,สุกี้น้ำกระดูกอ่อน,https://www.wongnai.com/recipes/ugc/7087159c34...,628,398,8,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'กระดูกอ่อน', 'amount': 0....","[{'externalId': 'vegetable-recipes', 'primaryN...",0.545455,"{'vegetable': 61.0, 'egg': 50, 'smell_flavor':...",117.150000,1.0,"{'mineral': 61.0, 'protein': 50}",111.0,"{'mineral': 0.5495495495495496, 'protein': 0.4...",0
2783,ผัดผักรวมมิตร,https://www.wongnai.com/recipes/ugc/5887aa7205...,987,947,1,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'ดอกกะหล่ำ', 'amount': 1, ...","[{'externalId': 'vegetable-recipes', 'primaryN...",0.538462,"{'vegetable': 793.0, 'bean': 150.0, 'process_m...",810.633333,1.5,"{'mineral': 793.0, 'carbo': 150.0, 'protein': ...",1033.0,"{'mineral': 0.7676669893514037, 'carbo': 0.145...",0
2784,ผัดมาม่าต้มยำ,https://www.wongnai.com/recipes/ugc/b30648906c...,397,377,2,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'กุ้ง', 'amount': 6, 'unit...","[{'externalId': 'noodle-recipes', 'primaryName...",0.538462,"{'shrimp': 300.0, 'egg': 50, 'noodle': 60.0, '...",482.500000,1.0,"{'protein': 350.0, 'carbo': 60.0, 'mineral': 0}",410.0,"{'protein': 0.8536585365853658, 'carbo': 0.146...",0
2785,สปาเก็ตตี้ขี้เมาเห็ด,https://www.wongnai.com/recipes/ugc/95804aa0df...,877,817,6,[{'contentUrl': 'https://img.wongnai.com/p/196...,"[{'ingredientName': 'เส้นสปาเก็ตตี้', 'amount'...","[{'externalId': 'main-dish-recipes', 'primaryN...",0.533333,"{'noodle': 2000, 'vegetable': 5600.0, 'smell_f...",265.533333,30.0,"{'carbo': 2000, 'mineral': 5600.0, 'protein': ...",7900.0,"{'carbo': 0.25316455696202533, 'mineral': 0.70...",0


In [28]:
db_df = merged_df.copy(deep=False)

In [31]:
db_df = db_df[['url','calories','spicyLevel']]

In [32]:
with open('../data/addition_data.json','w',encoding='utf8')as file:
  db_df.to_json(file,orient='records', force_ascii=False)

In [33]:
merged_df[['title','tags','mergeIngredients','nutrients_percen','spicyLevel']]

Unnamed: 0,title,tags,mergeIngredients,nutrients_percen,spicyLevel
0,ผัดวุ้นเส้นกระเพราหมูกรอบ,"[{'externalId': 'pork-recipes', 'primaryName':...","{'noodle': 50.0, 'pork': 220, 'smell_flavor': ...","{'carbo': 0.18518518518518517, 'protein': 0.81...",3
1,ไข่ตุ๋นมาม่า,"[{'externalId': 'egg-recipes', 'primaryName': ...","{'egg': 100, 'noodle': 60.0, 'milk': 50.0, 've...","{'protein': 0.47619047619047616, 'carbo': 0.52...",1
2,เกี่ยมอี๋คั่วไส้หมูก้อน,"[{'externalId': 'main-dish-recipes', 'primaryN...","{'pork': 200.0, 'egg': 100, 'smell_flavor': 39...",{'protein': 1.0},0
3,ออรินจิคั่วเกลือพริกไทย,"[{'externalId': 'chicken-recipes', 'primaryNam...","{'vegetable': 300, 'chicken': 200, 'salty_flav...","{'mineral': 0.6, 'protein': 0.4}",1
4,กระหล่ำปลีทอดน้ำปลา,"[{'externalId': 'vegetable-recipes', 'primaryN...","{'vegetable': 100.0, 'salty_flavor': 20, 'smel...",{'mineral': 1.0},0
...,...,...,...,...,...
2782,สุกี้น้ำกระดูกอ่อน,"[{'externalId': 'vegetable-recipes', 'primaryN...","{'vegetable': 61.0, 'egg': 50, 'smell_flavor':...","{'mineral': 0.5495495495495496, 'protein': 0.4...",0
2783,ผัดผักรวมมิตร,"[{'externalId': 'vegetable-recipes', 'primaryN...","{'vegetable': 793.0, 'bean': 150.0, 'process_m...","{'mineral': 0.7676669893514037, 'carbo': 0.145...",0
2784,ผัดมาม่าต้มยำ,"[{'externalId': 'noodle-recipes', 'primaryName...","{'shrimp': 300.0, 'egg': 50, 'noodle': 60.0, '...","{'protein': 0.8536585365853658, 'carbo': 0.146...",0
2785,สปาเก็ตตี้ขี้เมาเห็ด,"[{'externalId': 'main-dish-recipes', 'primaryN...","{'noodle': 2000, 'vegetable': 5600.0, 'smell_f...","{'carbo': 0.25316455696202533, 'mineral': 0.70...",0


In [34]:
model_df = merged_df[['title','tags','mergeIngredients','nutrients_percen','spicyLevel']]

In [35]:
from dictionary.tag_category import tag_category

In [36]:
tc = tag_category
del tc['ประเภทอาหาร']

In [37]:
all_tag = sum(list(tc.values()),[])

In [38]:
def is_external_id_in_list(external_ids, check_external_id):
  if(check_external_id in external_ids):
    return 1
  else:
    return 0

for external_id in all_tag:
  model_df[external_id] = model_df['tags'].apply(lambda tags : is_external_id_in_list([tag['externalId'] for tag in tags] ,external_id))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df[external_id] = model_df['tags'].apply(lambda tags : is_external_id_in_list([tag['externalId'] for tag in tags] ,external_id))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df[external_id] = model_df['tags'].apply(lambda tags : is_external_id_in_list([tag['externalId'] for tag in tags] ,external_id))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [39]:
for ingre in ingredient_merg:
  model_df[ingre] = model_df['mergeIngredients'].apply(lambda mergeIngredients : is_external_id_in_list(list(mergeIngredients.keys()) ,ingre))

In [40]:
def is_in_dict(n,k):
  if k in n:
    return n[k]
  else :
    return 0

In [41]:
for protein_carbo in protein_carbo_list:
  model_df[protein_carbo] = model_df['nutrients_percen'].apply(lambda nutrients : is_in_dict(nutrients,protein_carbo))

In [47]:
model_df = model_df.drop(['tags','mergeIngredients','nutrients_percen'],axis=1)

In [48]:
model_df.to_csv('../data/data_for_model2.csv')