In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [3]:
data = pd.read_csv('../Data/nutrition.csv', index_col=0)
data.set_index('name', inplace=True)

In [4]:
data[~data.isnull().any(axis=1)].loc['Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round roast, round'].head(60)

serving_size              100 g
calories                    121
total_fat                    3g
saturated_fat              1.1g
cholesterol                60mg
sodium                 53.00 mg
choline                 64.2 mg
folate                 4.00 mcg
folic_acid             0.00 mcg
niacin                 6.720 mg
pantothenic_acid       0.355 mg
riboflavin             0.184 mg
thiamin                0.063 mg
vitamin_a               4.00 IU
vitamin_a_rae          1.00 mcg
carotene_alpha         0.00 mcg
carotene_beta          0.00 mcg
cryptoxanthin_beta     0.00 mcg
lutein_zeaxanthin      0.00 mcg
lucopene                      0
vitamin_b12            1.84 mcg
vitamin_b6             0.644 mg
vitamin_c                0.0 mg
vitamin_d               1.00 IU
vitamin_e               0.24 mg
tocopherol_alpha        0.24 mg
vitamin_k               1.5 mcg
calcium                13.00 mg
copper                 0.042 mg
irom                    1.45 mg
magnesium              12.00 mg
manganes

In [5]:
units = data.loc['Sherbet, orange'].str.extract(r"([a-z]+)")

In [6]:
combine_columns = []
for i in range(len(units)):
    combined = str(data.columns[i]) + ' [' + str(units[0][i]) + ']'
    combine_columns.append(combined)
combine_columns

['serving_size [g]',
 'calories [nan]',
 'total_fat [g]',
 'saturated_fat [g]',
 'cholesterol [mg]',
 'sodium [mg]',
 'choline [mg]',
 'folate [mcg]',
 'folic_acid [mcg]',
 'niacin [mg]',
 'pantothenic_acid [mg]',
 'riboflavin [mg]',
 'thiamin [mg]',
 'vitamin_a [nan]',
 'vitamin_a_rae [mcg]',
 'carotene_alpha [mcg]',
 'carotene_beta [mcg]',
 'cryptoxanthin_beta [mcg]',
 'lutein_zeaxanthin [mcg]',
 'lucopene [nan]',
 'vitamin_b12 [mcg]',
 'vitamin_b6 [mg]',
 'vitamin_c [mg]',
 'vitamin_d [nan]',
 'vitamin_e [mg]',
 'tocopherol_alpha [mg]',
 'vitamin_k [mcg]',
 'calcium [mg]',
 'copper [mg]',
 'irom [mg]',
 'magnesium [mg]',
 'manganese [mg]',
 'phosphorous [mg]',
 'potassium [mg]',
 'selenium [mcg]',
 'zink [mg]',
 'protein [g]',
 'alanine [nan]',
 'arginine [nan]',
 'aspartic_acid [nan]',
 'cystine [nan]',
 'glutamic_acid [nan]',
 'glycine [nan]',
 'histidine [nan]',
 'hydroxyproline [nan]',
 'isoleucine [nan]',
 'leucine [nan]',
 'lysine [nan]',
 'methionine [nan]',
 'phenylalanine [

In [7]:
data.columns

Index(['serving_size', 'calories', 'total_fat', 'saturated_fat', 'cholesterol',
       'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
       'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
       'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
       'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
       'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose',
       'fat', 'saturated_fatty_acids', 'monounsatur

In [8]:
data['polyunsaturated_fatty_acids'] = data['polyunsaturated_fatty_acids'].str.extract('([-+]?\d*\.?\d+)')

In [9]:
data['polyunsaturated_fatty_acids']

name
Cornstarch                                                                                             0.025
Nuts, pecans                                                                                          21.614
Eggplant, raw                                                                                          0.076
Teff, uncooked                                                                                         1.071
Sherbet, orange                                                                                        0.080
                                                                                                       ...  
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round     0.244
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand     0.520
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand     0.980
Beef, raw, all

In [10]:
data.fillna('NaN', inplace=True)

In [11]:
data['lucopene'] = data['lucopene'].astype(str)
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Data columns (total 75 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   serving_size                 8789 non-null   object
 1   calories                     8789 non-null   int64 
 2   total_fat                    8789 non-null   object
 3   saturated_fat                8789 non-null   object
 4   cholesterol                  8789 non-null   object
 5   sodium                       8789 non-null   object
 6   choline                      8789 non-null   object
 7   folate                       8789 non-null   object
 8   folic_acid                   8789 non-null   object
 9   niacin                       8789 non-null   object
 10  pantothenic_acid             8789 non-null   object
 11  riboflavin                   8789 non-null   obje

In [12]:
mixed = ['serving_size','total_fat', 'saturated_fat',
       'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
       'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
       'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
       'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
       'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose',
       'fat', 'saturated_fatty_acids', 'monounsaturated_fatty_acids',
       'polyunsaturated_fatty_acids', 'fatty_acids_total_trans', 'alcohol',
       'ash', 'caffeine', 'theobromine', 'water']
for column in mixed:
#     if type(data[column][0]) ==str:
        data[column] = data[column].str.extract('([-+]?\d*\.?\d+)')
        print(column)

serving_size
total_fat
saturated_fat
cholesterol
sodium
choline
folate
folic_acid
niacin
pantothenic_acid
riboflavin
thiamin
vitamin_a
vitamin_a_rae
carotene_alpha
carotene_beta
cryptoxanthin_beta
lutein_zeaxanthin
lucopene
vitamin_b12
vitamin_b6
vitamin_c
vitamin_d
vitamin_e
tocopherol_alpha
vitamin_k
calcium
copper
irom
magnesium
manganese
phosphorous
potassium
selenium
zink
protein
alanine
arginine
aspartic_acid
cystine
glutamic_acid
glycine
histidine
hydroxyproline
isoleucine
leucine
lysine
methionine
phenylalanine
proline
serine
threonine
tryptophan
tyrosine
valine
carbohydrate
fiber
sugars
fructose
galactose
glucose
lactose
maltose
sucrose
fat
saturated_fatty_acids
monounsaturated_fatty_acids
polyunsaturated_fatty_acids
fatty_acids_total_trans
alcohol
ash
caffeine
theobromine
water


In [13]:
data

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100,381,0.1,,0,9.00,0.4,0.00,0.00,0.000,...,0.05,0.009,0.016,0.025,0.00,0.0,0.09,0.00,0.00,8.32
"Nuts, pecans",100,691,72,6.2,0,0.00,40.5,22.00,0.00,1.167,...,71.97,6.180,40.801,21.614,0.00,0.0,1.49,0.00,0.00,3.52
"Eggplant, raw",100,25,0.2,,0,2.00,6.9,22.00,0.00,0.649,...,0.18,0.034,0.016,0.076,0.00,0.0,0.66,0.00,0.00,92.30
"Teff, uncooked",100,367,2.4,0.4,0,12.00,13.1,0,0,3.363,...,2.38,0.449,0.589,1.071,0,0,2.37,0,0,8.82
"Sherbet, orange",100,144,2,1.2,1,46.00,7.7,4.00,0.00,0.063,...,2.00,1.160,0.530,0.080,1.00,0.0,0.40,0.00,0.00,66.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",100,125,3.5,1.4,62,54.00,64.5,4.00,0.00,6.422,...,3.50,1.353,1.554,0.244,62.00,0.0,1.11,0.00,0.00,72.51
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",100,206,8.9,3.9,109,50.00,0,0.00,0.00,7.680,...,8.86,3.860,3.480,0.520,109.00,0,1.60,0,0,59.95
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",100,277,23,12,78,39.00,0,1.00,0.00,6.550,...,22.74,11.570,8.720,0.980,78.00,0,0.92,0,0,59.80
"Beef, raw, all grades, trimmed to 0"" fat, separable lean only, boneless, eye of round roast, round",100,121,3,1.1,60,53.00,64.2,4.00,0.00,6.720,...,3.04,1.086,1.266,0.233,60.00,0.0,1.10,0.00,0.00,73.43


In [14]:
combine_columns

['serving_size [g]',
 'calories [nan]',
 'total_fat [g]',
 'saturated_fat [g]',
 'cholesterol [mg]',
 'sodium [mg]',
 'choline [mg]',
 'folate [mcg]',
 'folic_acid [mcg]',
 'niacin [mg]',
 'pantothenic_acid [mg]',
 'riboflavin [mg]',
 'thiamin [mg]',
 'vitamin_a [nan]',
 'vitamin_a_rae [mcg]',
 'carotene_alpha [mcg]',
 'carotene_beta [mcg]',
 'cryptoxanthin_beta [mcg]',
 'lutein_zeaxanthin [mcg]',
 'lucopene [nan]',
 'vitamin_b12 [mcg]',
 'vitamin_b6 [mg]',
 'vitamin_c [mg]',
 'vitamin_d [nan]',
 'vitamin_e [mg]',
 'tocopherol_alpha [mg]',
 'vitamin_k [mcg]',
 'calcium [mg]',
 'copper [mg]',
 'irom [mg]',
 'magnesium [mg]',
 'manganese [mg]',
 'phosphorous [mg]',
 'potassium [mg]',
 'selenium [mcg]',
 'zink [mg]',
 'protein [g]',
 'alanine [nan]',
 'arginine [nan]',
 'aspartic_acid [nan]',
 'cystine [nan]',
 'glutamic_acid [nan]',
 'glycine [nan]',
 'histidine [nan]',
 'hydroxyproline [nan]',
 'isoleucine [nan]',
 'leucine [nan]',
 'lysine [nan]',
 'methionine [nan]',
 'phenylalanine [

In [15]:
data.columns = combine_columns

In [16]:
# data.to_excel('Nutrition.xlsx')

In [17]:
# ! pip install googletrans

In [18]:
data.reset_index(inplace=True)

In [19]:
data.name

0                                              Cornstarch
1                                            Nuts, pecans
2                                           Eggplant, raw
3                                          Teff, uncooked
4                                         Sherbet, orange
                              ...                        
8784    Beef, raw, all grades, trimmed to 0" fat, sepa...
8785    Lamb, cooked, separable lean only, composite o...
8786    Lamb, raw, separable lean and fat, composite o...
8787    Beef, raw, all grades, trimmed to 0" fat, sepa...
8788    Beef, raw, all grades, trimmed to 0" fat, sepa...
Name: name, Length: 8789, dtype: object

In [None]:
# !pip uninstall googletrans
# !pip install googletrans==4.0.0rc1

In [22]:
from googletrans import Translator
translator = Translator()
ar = translator.translate('مرحبا').
print(ar)

Welcome


In [24]:
from googletrans import Translator

translator = Translator()
data1 = data.name[:20].apply(lambda x: translator.translate(x, dest='de').text)
print(data1)

0                            Maisstärke
1                     Nüsse, Pekannüsse
2                        Aubergine, roh
3                       Teff, ungekocht
4                       Sherbet, Orange
5                       Blumenkohl, roh
6                      Taroblätter, roh
7                   Lamm, roh, gemahlen
8                       Käse, Camembert
9                   Vegetarische Filets
10                Tempo, Picante -Sauce
11             Goji -Beeren, getrocknet
12                 Mango -Nektar, Dosen
13                  Cracks, Reis -Toast
14                  Huhn, gekocht, Füße
15            Wachtel, roh, nur Fleisch
16    Kuchen, Zitrone, gebratene Kuchen
17               Paprika, roh, Jalapeno
18        Geflügelte Bohnenknollen, roh
19              Salami, Türkei, gekocht
Name: name, dtype: object
