In [1]:
import os
import numpy as np
import json
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from pprint import pprint

In [2]:
path = "valori_alimenti"

In [3]:
names = np.array([
    name.split(".json")[0] for path, dirs, names in os.walk(path)
    for name in names if name.endswith(".json")
])

In [4]:
with open("{path}/{name}.json".format(path=path, name=names[0]), "r") as f:
    dic = json.load(f)
dic

{'name': 'riso,_chicchi_lunghi,_parboiled,_cotto',
 'Principali': {'Calorie': {'unit': 'kj', 'amount': '513'},
  'Grassi': {'unit': 'g', 'amount': '0.37'},
  'Carboidrati': {'unit': 'g', 'amount': '26.05'},
  'Proteine': {'unit': 'g', 'amount': '2.91'},
  'Fibre': {'unit': 'g', 'amount': '0.9'},
  'Zuccheri': {'unit': 'g', 'amount': '0.11'},
  'Acqua': {'unit': 'g', 'amount': '70.36'},
  'Ceneri': {'unit': 'g', 'amount': '0.3'}},
 'Minerali': {'Calcio': {'unit': 'mg', 'amount': '19'},
  'Sodio': {'unit': 'mg', 'amount': '2'},
  'Fosforo': {'unit': 'mg', 'amount': '55'},
  'Potassio': {'unit': 'mg', 'amount': '56'},
  'Ferro': {'unit': 'mg', 'amount': '1.81'},
  'Magnesio': {'unit': 'mg', 'amount': '9'},
  'Zinco': {'unit': 'mg', 'amount': '0.37'},
  'Rame': {'unit': 'mg', 'amount': '0.07'},
  'Manganese': {'unit': 'mg', 'amount': '0.354'},
  'Selenio': {'unit': 'mcg', 'amount': '9.3'}},
 'Vitamine': {'Retinolo (Vit. A)': {'unit': 'mcg', 'amount': '0'},
  'Vitamina A, IU': {'unit': 'IU'

In [5]:
sezioni_uniche = set()

for name in tqdm(names):
    
    with open("{path}/{name}.json".format(path=path, name=names[0]), "r") as f:
        dic = json.load(f)
        
    sezioni_uniche |= set(dic.keys())
    
sezioni_uniche -= set(["name","meta-data"])
    
print(sezioni_uniche)

HBox(children=(IntProgress(value=0, max=3263), HTML(value='')))


{'Grassi monoinsaturi', 'Aminoacidi', 'Zuccheri', 'Lipidi', 'Minerali', 'Grassi saturi', 'Principali', 'Grassi polinsaturi', 'Vitamine'}


In [6]:
chiavi_uniche = set()

for name in tqdm(names):
    
    with open("{path}/{name}.json".format(path=path, name=names[0]), "r") as f:
        dic = json.load(f)
        
    for sezione in sezioni_uniche:
        chiavi_uniche |= set(dic[sezione].keys())

chiavi_uniche |= set(["name"])
print(chiavi_uniche)

HBox(children=(IntProgress(value=0, max=3263), HTML(value='')))


{'Grassi Saturi 10:0', 'Fruttosio', '22:5 n-3', 'Maltosio', 'Leucina', 'Vitamina A, RAE', 'Grassi Monoinsaturi 20:1', 'Fillochinone (Vit. K)', 'Tiamina (Vit. B1)', 'Fenilalanina', 'Grassi Monoinsaturi 14:1', 'Tirosina', 'Alpha-tocoferolo (Vit. E)', 'Acidi grassi, polinsaturi', 'Treonina', 'Grassi Saturi 22:0', 'Vitamina E, aggiunta', 'Colecalcifenolo (Vit. D)', 'Acidi grassi, saturi', 'Grassi', '22:6 n-3', 'Calcio', 'Carotene, beta', 'Selenio', 'Cistina', 'Folato, DFE', 'Serina', 'Grassi Saturi 20:0', '8:0', 'Calorie', 'Isoleucina', 'Grassi Saturi 17:0', 'Grassi Saturi 16:0', 'Riboflavina (Vit. B2)', 'Grassi Saturi 18:0', 'Acido glutammico', 'Criptoxantina, beta', 'Prolina', 'Grassi Saturi 12:0', 'Grassi Polinsaturi 20:4 indifferenziato', 'Colina totale (Vit. J)', 'name', 'Grassi Polinsaturi 18:3 indifferenziato', 'Carotene, alfa', 'Valina', '6:0', 'Magnesio', 'Manganese', 'Acqua', 'Sodio', '4:0', 'Grassi Polinsaturi 18:4', 'Acido aspartico', 'Folati, totali', 'Niacina (Vit. B3)', 'Pr

In [7]:
[x for x in chiavi_uniche if "," in x]

['Vitamina A, RAE',
 'Acidi grassi, polinsaturi',
 'Vitamina E, aggiunta',
 'Acidi grassi, saturi',
 'Carotene, beta',
 'Folato, DFE',
 'Criptoxantina, beta',
 'Carotene, alfa',
 'Folati, totali',
 'Vitamina A, IU',
 'Acidi grassi, monoinsaturi',
 'Vitamina B-12, aggiunta']

In [8]:
merged = pd.DataFrame()

for name in tqdm(names):
    df = pd.DataFrame()
    
    with open("{path}/{name}.json".format(path=path, name=name), "r") as f:
        dic = json.load(f)
        
    for sezione in sezioni_uniche:
        sub_section = dic.get(sezione,None)
        
        if sub_section:
            for sub_key,value in sub_section.items():
                unit = value["unit"]
                amount = float(value["amount"])
                
                if unit == "kj":
                    unit = "kcal"
                    amount *= 0.239006
                    amount = round(amount,2)
                
                if sezione == "Grassi polinsaturi":
                    if not sub_key.startswith("Grassi polinsaturi"):
                        sub_key = "Grassi polinsaturi " + sub_key
                        
                if sezione == "Grassi saturi":
                    if not sub_key.startswith("Grassi saturi"):
                        sub_key = "Grassi saturi " + sub_key
                        
                        
                df[sub_key + " | " + unit] = [amount]
                
    df["name"] = [dic["name"].replace("_"," ").lower().strip()]
    
    merged = merged.append(df,sort=True)
    
merged = merged.set_index("name") # set name as the index of the DataFrame

HBox(children=(IntProgress(value=0, max=3263), HTML(value='')))




In [9]:
merged

Unnamed: 0_level_0,"Acidi grassi, monoinsaturi | g","Acidi grassi, polinsaturi | g","Acidi grassi, saturi | g","Acidi grassi, trans | g","Acidi grassi, trans-monoenoici | g","Acidi grassi, trans-polienoico | g",Acido Pantotenico (Vit. B5) | mg,Acido ascorbico (Vit. C) | mg,Acido aspartico | g,Acido folico (Vit. B9 o M o Folacina) | mcg,...,Valina | g,"Vitamina A, IU | IU","Vitamina A, RAE | mcg_RAE","Vitamina B-12, aggiunta | mcg",Vitamina D (D2+D3) | mcg,Vitamina D2 | mcg,Vitamina D3 | mcg,"Vitamina E, aggiunta | mg",Zinco | mg,Zuccheri | g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"riso, chicchi lunghi, parboiled, cotto",0.074,0.091,0.074000,,,,0.323,0.0,0.308,79.0,...,0.182,0.0,0.0,0.0,0.0,,,0.0,0.37,0.110000
knorr vellutata di carciofi di gerusalemme,,,,,,,,,,,...,,,,,,,,,,
astice,0.174,0.293,0.159000,,,,0.546,1.2,1.648,0.0,...,0.749,53.0,16.0,0.0,0.0,,0.0,0.0,1.30,0.000000
spezzatino in scatola,2.543,0.251,2.185000,,,,0.120,0.7,0.382,0.0,...,0.182,223.0,11.0,0.0,0.0,,,0.0,1.00,1.760000
"crescione, lesso",0.205,0.196,0.020000,,,,0.163,23.0,,0.0,...,,4649.0,232.0,0.0,0.0,,,0.0,0.15,3.110000
knorr vellutata di verdure dolci,,,,,,,,,,,...,,,,,,,,,,
nestle - motta kids - liquì,,,,,,,,,,,...,,,,,,,,,,
loacker cremkakao classic 45 grammi,,,,,,,,,,,...,,,,,,,,,,
ferrero kinder colazione più - pezzo singolo,,,10.800000,,,,,,,,...,,,,,,,,,,21.500000
nestle - buitoni buitoni bella napoli le microonde pizza margherita,,,4.300000,,,,,,,,...,,,,,,,,,,1.500000


In [10]:
[x for x in merged.columns if x.startswith("Calorie")]

['Calorie | kcal']

In [11]:
pprint(list(merged.columns))

['Acidi grassi, monoinsaturi | g',
 'Acidi grassi, polinsaturi | g',
 'Acidi grassi, saturi | g',
 'Acidi grassi, trans | g',
 'Acidi grassi, trans-monoenoici | g',
 'Acidi grassi, trans-polienoico | g',
 'Acido Pantotenico (Vit. B5) | mg',
 'Acido ascorbico (Vit. C) | mg',
 'Acido aspartico | g',
 'Acido folico (Vit. B9 o M o Folacina) | mcg',
 'Acido glutammico | g',
 'Acqua | g',
 'Alanina | g',
 'Alpha-tocoferolo (Vit. E) | mg',
 'Amido | g',
 'Arginina | g',
 'Beta-sistosterolo | mg',
 'Betaina | mg',
 'Calcio | mg',
 'Calorie | kcal',
 'Campesterolo | mg',
 'Carboidrati | g',
 'Carotene, alfa | mcg',
 'Carotene, beta | mcg',
 'Ceneri | g',
 'Cistina | g',
 'Cobalamina (Vit. B12) | mcg',
 'Colecalcifenolo (Vit. D) | IU',
 'Colesterolo | mg',
 'Colina totale (Vit. J) | mg',
 'Criptoxantina, beta | mcg',
 'Destrosio | g',
 'Fenilalanina | g',
 'Ferro | mg',
 'Fibre | g',
 'Fillochinone (Vit. K) | mcg',
 'Fitosteroli | mg',
 'Fluoro | mcg',
 'Folati, totali | mcg',
 'Folato alimentar

In [12]:
merged.to_csv("csv/valori_alimentari.csv")