In [4]:
# Import necessary packages
from matplotlib.axes._axes import _log as matplotlib_axes_logger
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import earthpy as et
import numpy as np

# Handle date time conversions between pandas and matplotlib
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

# Dealing with error thrown by one of the plots
matplotlib_axes_logger.setLevel('ERROR')
import warnings
warnings.filterwarnings('ignore')

# Adjust font size and style of all plots in notebook with seaborn
sns.set(font_scale=1.5, style="whitegrid")

# Set working directory to earth-analytics
os.chdir(os.path.join(et.io.HOME, "earth-analytics","food-database-uk"))

#/food-database-uk/1.4_inorganics.csv
   

# Import data from .csv file 
fname = os.path.join("1.4_inorganics.csv")

# Import data using datetime and set index to datetime

# set the max columns to none
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
inorganics_fooddb_uk = pd.read_csv(fname)
inorganics_fooddb_uk.head()

Unnamed: 0,Food Code,Food Name,Description,Group,Previous,Main data references,Footnote,Sodium (mg),Potassium (mg),Calcium (mg),Magnesium (mg),Phosphorus (mg),Iron (mg),Copper (mg),Zinc (mg),Chloride (mg),Manganese (mg),Selenium (µg),Iodine (µg)
0,,,,,,,,,K,CA,MG,P,FE,CU,ZN,CL,MN,SE,I
1,,,,,,,,Sodium,Potassium,Calcium,Magnesium,Phosphorus,Iron,Copper,Zinc,Chloride,Manganese,Selenium,Iodine
2,13-145,"Ackee, canned, drained",8 cans,DG,554.0,"MW4, 1978; and Vegetables, Herbs and Spices Su...",,240,270,35,40,47,0.70,0.27,0.6,340,N,N,Tr
3,13-146,"Agar, dried",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,110,110,760,620,50,20.60,N,14.4,N,4.30,N,N
4,13-147,"Agar, dried, soaked and drained",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,10,20,110,75,8,3.50,N,2.5,N,0.40,N,N


In [5]:
inorganics_fooddb_uk = inorganics_fooddb_uk.dropna(subset=['Food Code'])
inorganics_fooddb_uk.head()

Unnamed: 0,Food Code,Food Name,Description,Group,Previous,Main data references,Footnote,Sodium (mg),Potassium (mg),Calcium (mg),Magnesium (mg),Phosphorus (mg),Iron (mg),Copper (mg),Zinc (mg),Chloride (mg),Manganese (mg),Selenium (µg),Iodine (µg)
2,13-145,"Ackee, canned, drained",8 cans,DG,554.0,"MW4, 1978; and Vegetables, Herbs and Spices Su...",,240,270,35,40,47,0.7,0.27,0.6,340,N,N,Tr
3,13-146,"Agar, dried",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,110,110,760,620,50,20.6,N,14.4,N,4.30,N,N
4,13-147,"Agar, dried, soaked and drained",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,10,20,110,75,8,3.5,N,2.5,N,0.40,N,N
5,13-148,"Alfalfa sprouts, raw",Analytical and literature sources,DG,,"Vegetables, Herbs and Spices Supplement, 1991",,6,79,32,27,70,1.0,0.16,0.9,N,0.20,N,N
6,13-801,"Allspice, ground",Literature sources,H,,Marsh et al. (1977) Composition of foods: spic...,,77,1040,660,130,110,7.1,0.55,1.0,N,1.70,N,N


In [6]:
inorganics_fooddb_uk.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2887 entries, 2 to 2888
Data columns (total 19 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Food Code             2887 non-null   object
 1   Food Name             2887 non-null   object
 2   Description           2887 non-null   object
 3   Group                 2887 non-null   object
 4   Previous              1867 non-null   object
 5   Main data references  2886 non-null   object
 6   Footnote              12 non-null     object
 7   Sodium (mg)           2887 non-null   object
 8   Potassium (mg)        2886 non-null   object
 9   Calcium (mg)          2886 non-null   object
 10  Magnesium (mg)        2886 non-null   object
 11  Phosphorus (mg)       2886 non-null   object
 12  Iron (mg)             2886 non-null   object
 13  Copper (mg)           2886 non-null   object
 14  Zinc (mg)             2886 non-null   object
 15  Chloride (mg)         2885 non-null   

In [8]:
#select all columns except 'rebounds' and 'assists'
#df.loc[:, ~df.columns.isin(['rebounds', 'assists'])]

inorganics_fooddb_uk_select = inorganics_fooddb_uk.loc[:, ~inorganics_fooddb_uk.columns.isin(['Description', 'Previous', 'Main data references', 'Footnote'])]
inorganics_fooddb_uk_select.head()

Unnamed: 0,Food Code,Food Name,Group,Sodium (mg),Potassium (mg),Calcium (mg),Magnesium (mg),Phosphorus (mg),Iron (mg),Copper (mg),Zinc (mg),Chloride (mg),Manganese (mg),Selenium (µg),Iodine (µg)
2,13-145,"Ackee, canned, drained",DG,240,270,35,40,47,0.7,0.27,0.6,340,N,N,Tr
3,13-146,"Agar, dried",DG,110,110,760,620,50,20.6,N,14.4,N,4.30,N,N
4,13-147,"Agar, dried, soaked and drained",DG,10,20,110,75,8,3.5,N,2.5,N,0.40,N,N
5,13-148,"Alfalfa sprouts, raw",DG,6,79,32,27,70,1.0,0.16,0.9,N,0.20,N,N
6,13-801,"Allspice, ground",H,77,1040,660,130,110,7.1,0.55,1.0,N,1.70,N,N


In [9]:
inorganics_fooddb_uk_select.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2887 entries, 2 to 2888
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Food Code        2887 non-null   object
 1   Food Name        2887 non-null   object
 2   Group            2887 non-null   object
 3   Sodium (mg)      2887 non-null   object
 4   Potassium (mg)   2886 non-null   object
 5   Calcium (mg)     2886 non-null   object
 6   Magnesium (mg)   2886 non-null   object
 7   Phosphorus (mg)  2886 non-null   object
 8   Iron (mg)        2886 non-null   object
 9   Copper (mg)      2886 non-null   object
 10  Zinc (mg)        2886 non-null   object
 11  Chloride (mg)    2885 non-null   object
 12  Manganese (mg)   2885 non-null   object
 13  Selenium (µg)    2884 non-null   object
 14  Iodine (µg)      2883 non-null   object
dtypes: object(15)
memory usage: 360.9+ KB


In [10]:
inorganics_fooddb_uk_select

Unnamed: 0,Food Code,Food Name,Group,Sodium (mg),Potassium (mg),Calcium (mg),Magnesium (mg),Phosphorus (mg),Iron (mg),Copper (mg),Zinc (mg),Chloride (mg),Manganese (mg),Selenium (µg),Iodine (µg)
2,13-145,"Ackee, canned, drained",DG,240,270,35,40,47,0.70,0.27,0.6,340,N,N,Tr
3,13-146,"Agar, dried",DG,110,110,760,620,50,20.60,N,14.4,N,4.30,N,N
4,13-147,"Agar, dried, soaked and drained",DG,10,20,110,75,8,3.50,N,2.5,N,0.40,N,N
5,13-148,"Alfalfa sprouts, raw",DG,6,79,32,27,70,1.00,0.16,0.9,N,0.20,N,N
6,13-801,"Allspice, ground",H,77,1040,660,130,110,7.10,0.55,1.0,N,1.70,N,N
7,14-870,"Almonds, flaked and ground",GA,14,780,240,270,550,3.00,1.00,3.2,18,1.70,2,2
8,14-897,"Almonds, toasted",GA,3,713,268,279,471,3.73,1.10,3.3,18,2.23,2,2
9,14-898,"Almonds, weighed with shells",GA,Tr,271,100,100,178,1.37,0.38,1.2,7,0.81,2,1
10,14-896,"Almonds, whole kernels",GA,1,733,269,270,481,3.71,1.03,3.1,18,2.18,4,2
11,13-150,"Amaranth leaves, boiled in unsalted water",DG,11,540,220,110,54,2.40,0.11,0.5,12,0.90,N,N


In [11]:
#view the distribution of food groups with value_counts
pd.value_counts(inorganics_fooddb_uk_select.Group)

DR     309
DG     229
FA     211
MR     151
JA     151
MAE    105
MAC     91
DB      66
MAG     66
MCA     64
H       61
JC      53
AS      48
GA      47
BL      46
MI      42
AM      42
AN      41
MG      40
AF      38
AP      38
BR      38
JR      35
AI      32
WC      30
AA      29
AC      29
AD      27
MAA     25
WY      23
SC      22
JK      22
PAA     22
MCO     21
SEC     18
BP      18
WCD     18
AT      18
OC      18
PAC     17
DAP     17
SEA     17
PCC     16
CA      16
DF      16
JM      16
BN      15
AO      15
WCN     14
CD      14
DAM     14
BH      13
WCG     13
WAA     12
OA      12
SN      11
QA      11
MBG     11
FC      10
DAR     10
WAC      9
PE       9
BJC      8
BAR      8
J        8
PCA      8
QE       7
BC       7
MCC      7
F        7
BV       6
AB       6
AE       6
OF       5
DI       5
WA       5
AG       5
DAE      4
QC       4
BAK      4
BAE      4
BAH      4
QI       4
MAI      4
S        4
WAE      4
BAB      3
MEC      3
A        3
BTM      3
G        3

In [12]:
len(pd.value_counts(inorganics_fooddb_uk_select.Group))

121

In [13]:
# Replace Multiple Values in a Column
# DataFrame.replace({'column_name' : { old_value_1 : new_value_1, old_value_2 : new_value_2}})

inorganics_fooddb_uk_select_rpl = inorganics_fooddb_uk_select.replace({'Group' : { "DR" : "Vegetable dishes", "DG" : "Vegetables, general", "FA" : "Fruit, genera", "MR" : "Meat dishes" , "JA" : "White fish", "MAE" : "Lamb", "MAC" : "Beef", "DB" : "Beans and lentils", "MAG" : "Pork", "MCA" : "Chicken"  }})
inorganics_fooddb_uk_select_rpl.head()


Unnamed: 0,Food Code,Food Name,Group,Sodium (mg),Potassium (mg),Calcium (mg),Magnesium (mg),Phosphorus (mg),Iron (mg),Copper (mg),Zinc (mg),Chloride (mg),Manganese (mg),Selenium (µg),Iodine (µg)
2,13-145,"Ackee, canned, drained","Vegetables, general",240,270,35,40,47,0.7,0.27,0.6,340,N,N,Tr
3,13-146,"Agar, dried","Vegetables, general",110,110,760,620,50,20.6,N,14.4,N,4.30,N,N
4,13-147,"Agar, dried, soaked and drained","Vegetables, general",10,20,110,75,8,3.5,N,2.5,N,0.40,N,N
5,13-148,"Alfalfa sprouts, raw","Vegetables, general",6,79,32,27,70,1.0,0.16,0.9,N,0.20,N,N
6,13-801,"Allspice, ground",H,77,1040,660,130,110,7.1,0.55,1.0,N,1.70,N,N


In [14]:
#view the distribution of food groups with value_counts
pd.value_counts(inorganics_fooddb_uk_select_rpl.Group)

Vegetable dishes       309
Vegetables, general    229
Fruit, genera          211
Meat dishes            151
White fish             151
Lamb                   105
Beef                    91
Beans and lentils       66
Pork                    66
Chicken                 64
H                       61
JC                      53
AS                      48
GA                      47
BL                      46
MI                      42
AM                      42
AN                      41
MG                      40
AF                      38
AP                      38
BR                      38
JR                      35
AI                      32
WC                      30
AA                      29
AC                      29
AD                      27
MAA                     25
WY                      23
SC                      22
JK                      22
PAA                     22
MCO                     21
SEC                     18
BP                      18
WCD                     18
A