generate a json of recipes from a csv

In [14]:
import pandas as pd
import numpy as np
import json
import simplejson
import csv
import os
import re
from bs4 import BeautifulSoup
import requests
from urllib.parse import urlparse
from numpyencoder import NumpyEncoder

In [5]:
path = '../input/kiwi.csv'
df = pd.read_csv(path)

In [6]:
df.isnull().sum()

recipe_name                 0
recipe_url                  0
course                      0
cusine                      0
image_path                  0
website_name                0
serving                     0
ingredient_name             0
ingredient_standard_name    0
ingredient_desc             0
quantity                    0
quantity_in_gram            0
serving_unit                0
dtype: int64

In [17]:
recipes = []

def na_to_empty_str(item):
    if item != item:
        return ''
    if str(item).strip() == '-':
        return ''
    return item

def na_to_null(item):
    if str(item).strip() == '-':
        return np.nan
    return int(item)

def null_to_zero(item):
    if str(item).strip() == '-':
        return 0
    return float(item)

def course_check(row):
    if 'course' in row.columns:
        return na_to_empty_str(row['course'].values[0])
    return ''

def serving_check(row):
    if row['serving'].values[0] != row['serving'].values[0]:
        return ''   # return '0'
    if row['serving'].astype(str).values[0].strip() == '-':
        return ''   # return '0'
    return str(int(row['serving'].values[0]))

def cusine_check(row):
    if 'cusine' in row.columns:
        return na_to_empty_str(row['cusine'].values[0])
    if 'cuisine' in row.columns:
        return na_to_empty_str(row['cuisine'].values[0])
    return ''

def quantity_in_gram_check(row):
    if 'quantity_in_gram' in row.columns:
        return na_to_null(row['quantity_in_gram'].values[0])
    if 'quantity_in_grams' in row.columns:
        return na_to_null(row['quantity_in_grams'].values[0])
    return np.nan

def create_instance(row):
    instance = {
        'recipe_name': row['recipe_name'].values[0],
        'image_path': na_to_empty_str(row['image_path'].values[0]),
        'course': [{"title":f"{course_check(row)}"}],
        'cusine': [{"title":f"{cusine_check(row)}"}],
        'recipe_url': row['recipe_url'].values[0],
        'website_name': row['website_name'].values[0],
        'serving': serving_check(row), 
        'ingredients': [{
            'ingredient_name': row['ingredient_name'].values[0],
            'ingredient_standard_name': row['ingredient_standard_name'].values[0],
            'ingredient_desc': na_to_empty_str(row['ingredient_desc'].values[0]),
            'quantity': null_to_zero(row['quantity'].values[0]),
            'quantity_in_gram': quantity_in_gram_check(row),
            'serving_unit': na_to_empty_str(row['serving_unit'].values[0])
        }] 
    }
    return instance

def fetch_ingredient_values(row):
    ingredient = {
        'ingredient_name': row['ingredient_name'].values[0],
        'ingredient_standard_name': row['ingredient_standard_name'].values[0],
        'ingredient_desc': na_to_empty_str(row['ingredient_desc'].values[0]),
        'quantity': null_to_zero(row['quantity'].values[0]),
        'quantity_in_gram': quantity_in_gram_check(row),
        'serving_unit': na_to_empty_str(row['serving_unit'].values[0])
    }
    return ingredient



row = prev = df.iloc[:1, :]
instance = create_instance(row)
for i in range(1, len(df)):
    row = df.iloc[i:i+1, :]
    if row['recipe_name'].values[0] == prev['recipe_name'].values[0]:
        ingredient = fetch_ingredient_values(row)
        instance['ingredients'].append(ingredient)
    else:
        recipes.append(instance)
        instance = create_instance(row)
        prev = row

recipes.append(instance)



In [18]:
len(recipes)

1

In [19]:
path = '../input/kiwi.json'
with open(path, 'w', encoding='utf-8') as f:
    simplejson.dump(recipes, f, ignore_nan=True ,ensure_ascii=False, indent=4)