In [1]:
import pandas as pd

#Getting rid of "Try using .loc[row_indexer,col_indexer] = value instead" warning
pd.options.mode.chained_assignment = None  # default='warn'

#Formating display due to some results being too big to display correctly with default settings
pd.set_option('display.float_format',  '{:,.2f}'.format)

<b>Data Discovery - Task 2.1: Identify the primary key of each table and test them.<b>

In [2]:
#Cell for importing all FAO data

#animal products dataframe:
ap=pd.read_csv(r"C:\Users\Blagoj\Documents\Openclassrooms\Project 4\Animal products.csv")

#Setting a custom primary key for the animal products dframe:
ap.set_index(['Area Code (M49)', 'Element Code', 'Item Code (CPC)', 'Year Code'], inplace=True, verify_integrity=True)
#inplace True assigns the data in place
#verify_integrity checks the new custom index for duplicates
#ap.index.value_counts() can be used to count the unique values of the index thus confirming that PK contains no duplicates
#primary key can also be found with itertools (only figured this after I manually identified the PK)

#vegetal products dataframe
vp=pd.read_csv(r"C:\Users\Blagoj\Documents\Openclassrooms\Project 4\Vegetal products.csv")

#Setting a custom primary key for the vegetal products dframe:
vp.set_index(['Area Code (M49)', 'Element Code', 'Item Code (CPC)', 'Year Code'], inplace=True, verify_integrity=True)

# Adds variable ‘origin’
ap["origin"] = "animal"
vp["origin"] = "vegetal"
#combined dframe for all food products regardless whether they are animal or vegetal:
comb=pd.concat([ap,vp])

#population dataframe:
pop=pd.read_csv(r"C:\Users\Blagoj\Documents\Openclassrooms\Project 4\Population.csv")

#undernourished dataframe
un=pd.read_csv(r"C:\Users\Blagoj\Documents\Openclassrooms\Project 4\Undernourishment.csv")

#cereals dataframe
cer=pd.read_csv(r"C:\Users\Blagoj\Documents\Openclassrooms\Project 4\Cereals.csv")

<b>Data Discovery - Task 2.2: Create a dataframe containing information about each country's population. Calculate the total number of humans on the planet.<b>

In [3]:
#Creation of a dframe without lines from previous years and without manual input lines in order to avoid duplicates (China).
newpop=pop.loc[(pop['Year']==2019)& (pop['Flag']=="X")]
#Summing up the whole population. 
totalpop=newpop.iloc[:,11:12].sum()*1000

#Could simply calculate the total population with the following line without creating a new dframe:
#totalpop=pop.loc[(pop['Year']==2019) & (pop['Flag']=="X"),['Value']].sum()*1000

print(totalpop)

Value   7,652,392,110.00
dtype: float64


<b>Data Discovery - Task 2.3a: Laura: "Okay… this does not seem easy! Production, Import Quantity, Export Quantity, Feed, Losses, Domestic Supply...wait- isn't Domestic Supply the difference between imports and exports?"

<b>You: "Uh...I’m not so sure..."

<b>Laura: "Okay. Would you check that for me please? And what are all the available elements?"<b>

In [4]:
#Counting the unique values in the 'Element' column:
print("There are " + str(comb['Element'].nunique()) + " unique values. They are:")
#Listing all of the unique values in the 'Element' column:
print(sorted(comb["Element"].unique()))

There are 17 unique values. They are:
['Domestic supply quantity', 'Export Quantity', 'Fat supply quantity (g/capita/day)', 'Feed', 'Food', 'Food supply (kcal/capita/day)', 'Food supply quantity (kg/capita/yr)', 'Import Quantity', 'Losses', 'Other uses (non-food)', 'Processing', 'Production', 'Protein supply quantity (g/capita/day)', 'Residuals', 'Seed', 'Stock Variation', 'Tourist consumption']


<b>Data Discovery - Task 2.3b: Among the documents on the Food Balance Sheets that you have downloaded, you will find redundant information concerning the 11 elements.  Identify these redundancies and give your answer as a mathematical formula. Have a look at the Food Balance Sheets then click on “definitions and standards”<b>

<b>The expected formula is a simple three term equation involving each of the 11 amounts seen above:  a1+a2+[...]=b1+[...]=c1+c2+[...] . For this equation, give the example of wheat in France.<b>

In [5]:
#Production + imports - exports + changes in stocks (decrease or increase) = supply for 
#domestic utilization in the new methodology.

#For this equation, give the example of wheat in France.

#Calculating and printing the difference between the sum of Production & Import Quantity and the sum of 
#Export Quantity and Stock Variation.
newvp=vp.loc[(vp['Area']=='France') & (vp['Item']=='Wheat and products') & (vp['Year']==2019)]
b=newvp.loc[newvp['Element'].isin(['Production', 'Import Quantity']),
['Value']].sum()-newvp.loc[newvp['Element'].isin(['Export Quantity','Stock Variation']),['Value']].sum()
print(f"The sum of the main elements concerning movement is: {int(b):.0f}")

#Printing the value of the Domestic supply quantity
a=newvp.loc[newvp['Element'].isin(['Domestic supply quantity']),['Value']].sum()
print(f"The domestic supply value is: {int(a):.0f}")

#Printing the sum of all of the remaining elements to confirm that:
#The expected formula is a simple three term equation involving each of the 11 amounts seen above:
#a1+a2+[...]=b1+[...]=c1+c2+[...] .
c=newvp.loc[newvp['Element'].isin(['Feed', 'Seed', 'Losses', 'Processing', 'Other uses (non-food)', 'Residuals', 
'Food', 'Tourist consumption']),['Value']].sum()
print(f"The sum of all elements concerning the usage is: {int(c):.0f}")

#We can see that the three results are the same confirming the expected three term equation.

The sum of the main elements concerning movement is: 20338
The domestic supply value is: 20338
The sum of all elements concerning the usage is: 20338


In [6]:
#Further details on the three term equation
a1=newvp.loc[(newvp['Element']=='Production'),['Value']].sum()
a2=newvp.loc[(newvp['Element']=='Import Quantity'),['Value']].sum()                              
a3=newvp.loc[(newvp['Element']=='Export Quantity'),['Value']].sum()
a4=newvp.loc[(newvp['Element']=='Stock Variation'),['Value']].sum()

b1=newvp.loc[(newvp['Element']=='Domestic supply quantity'),['Value']].sum()

c1=newvp.loc[(newvp['Element']=='Feed'),['Value']].sum()
c2=newvp.loc[(newvp['Element']=='Seed'),['Value']].sum()
c3=newvp.loc[(newvp['Element']=='Losses'),['Value']].sum()
c4=newvp.loc[(newvp['Element']=='Processing'),['Value']].sum()
c5=newvp.loc[(newvp['Element']=='Other uses (non-food)'),['Value']].sum()
c6=newvp.loc[(newvp['Element']=='Residuals'),['Value']].sum()
c7=newvp.loc[(newvp['Element']=='Food'),['Value']].sum()
c8=newvp.loc[(newvp['Element']=='Tourist consumption'),['Value']].sum()


print(int(a1+a2-a3-a4)==int(b1)==int(c1+c2+c3+c4+c5+c6+c7+c8))

True


<b>Data cleaning - Task 3<b>

In [7]:
#Renaming of combined dataframe’s columns. Flagging redundant columns
comb.columns = ["red", "red2", "country", "element","item", "year", "unit", "value", "red3", "red4", 'origin']

#Transformation of ‘comb’ to a pivot table
pivotcomb = comb.pivot_table(
index=["country", "item", "year", "origin"],
columns = ["element"], values=["value"], aggfunc=sum)

pivotcomb.columns=['domestic_supply_quantity', 'export_quantity', 'fat_supply_quantity_gcapitaday', 'feed',
'food', 'food_supply_kcalcapitaday', 'food_supply_quantity_kgcapitayr', 'import_quantity', 'losses', 
'other_uses', 'processing', 'production', 'protein_supply_quantity_gcapitaday', 'residuals', 'seed', 
'stock_variation', 'tourist_consumption']
#Index columns need to be normal columns
pivotcomb = pivotcomb.reset_index()

<b>Computing new variables - Task 4: Preparations<b>

In [8]:
#For easier computation of the new variables, I've decided to add population values in the main dataframe.

#Joining the pivotcomb and the population tables into a single dataframe called cnv (Computing New Variables).
cnv=pd.merge(pivotcomb, pop[['Area', 'Year', 'Value']], left_on=['country', 'year'], right_on = ['Area', 'Year'])
#Removing the columns on which we joined
cnv=cnv.drop(columns=['Area', 'Year'])
#Renaming the column holding the population value
cnv.columns = [*cnv.columns[:-1], 'population']
#Removing China rows as they are duplicates of China split in smaller sections.
cnv=cnv.loc[(cnv['country']!="China")]

<b>Computing new variables - Tasks 4.1, 4.2, 4.3 and 4.4<b>

In [9]:
#4.1 - Calculating food_supply_kcal=food_supply_kcalcapitaday*365*population*1000
cnv['food_supply_kcal'] = cnv['food_supply_kcalcapitaday']*365*cnv['population']*1000

#4.1 - Calculating food_supply_kgprotein=protein_supply_quantity_gcapitaday*365*population*1000/1000
cnv['food_supply_kgprotein'] = cnv['protein_supply_quantity_gcapitaday']*365*cnv['population'] #*1000/1000

#4.2 - Calculating food_supply_kg=food*1000*1000
cnv['food_supply_kg']=cnv['food']*1000000

#4.3 - Calculating ratio_kcalkg=food_supply_kcal/food_supply_kg
cnv['ratio_kcalkg']=cnv.apply(lambda row: (row['food_supply_kcal']/row['food_supply_kg'] if row['food_supply_kg']>0 else 0), axis=1)

#4.3 - Calculating protein_percentage=food_supply_kgprotein/food_supply_kg
cnv['protein_percentage']=cnv.apply(lambda row: (row['food_supply_kgprotein']/row['food_supply_kg'] if row['food_supply_kg']>0 else 0), axis=1)

#4.4 - Calculating dom_sup_kcal=domestic_supply_quantity*1000*1000*ratio_kcalkg
cnv['dom_sup_kcal']=cnv['domestic_supply_quantity']*1000000*cnv['ratio_kcalkg']

#4.4 - Calculating dom_sup_kgprot=domestic_supply_quantity*1000*1000*protein_percentage
cnv['dom_sum_kgprot']=cnv['domestic_supply_quantity']*1000000*cnv['protein_percentage']

<b> Computing new variables - Task 4.5.1: From the FAO Food Security Indicators, find the countries with a malnourishment
rate of more than 10% <b>

In [10]:
#Joining the undernourishment dframe with the main (cnv) dframe
cnv2=pd.merge(cnv, un[['country', 'undernourishment']], on='country', how='left')

<b> Computing new variables - Task 4.5.2: Find the 25 most exported items (in terms of quantity) by these countries for any
given year). <b>

In [11]:
#Creating a dataframe for the top 25 exported items and their quantity.
givenyear=2019  #Value should be either 2014 or 2019 as those are the only 2 years we are observing in our main dataframe.
#Starting with a pivot to show us only an item and its exported quantity for a given year and for only undernourished countries.
topitems = cnv2[(cnv2.undernourishment>0.1) & (cnv2.year==givenyear)].pivot_table(index=["item"], columns = [], values=["export_quantity"], aggfunc='sum')
#Reseting index
topitems= topitems.reset_index()
#Renaming columns
topitems.columns=['exported_item', 'exported_quantity']
#Sorting the values in an ascending fashion
topitems=topitems.sort_values(by=['exported_quantity'], ascending=False)
#Creating a dataframe for the top 25 exported items
top25=topitems.head(25)

<b> Computing new variables - Task 4.5.3: From the global food balance sheet data, select the 200 highest import quantities
among these 25 items (1 import quantity = quantity of a given product imported by
a given country over the selected year). <b>

In [12]:
#Select lines from main dataframe where the items match those of the top25 df

#Adding exported_quantity on the main frame for easier filtering later on.
cnv3=cnv2.merge(top25, left_on='item', right_on='exported_item', how='left').drop(columns=['exported_item'])
#Ordering the DF by import quantity for easier filtering later on.
cnv3=cnv3.sort_values(['import_quantity'], ascending=False)

#Temporary dataframe containing lines with only undernourished countries where the items are in the top25 exported list.
temp = cnv3[(cnv3['undernourishment'] > 0.1) & (cnv3['year']==givenyear) & cnv3['exported_quantity'].notnull()]

#Selecting the top 200 from the temp dframe
top200=temp.head(200)

<b> Computing new variables - Task 4.5.4: For the 200 corresponding lines in the food balance dataframe, set “True” for the
variable, and “False” for the other lines. <b>

In [13]:
#Joining the main cnv3 dframe with the top200 dframe by intersection (no 'on' parameter)
#Indicator=true adds a column to the new dframe called '_merge' that holds value 'both' if the merge key appears in both dframes
cnv4=cnv3.merge(top200, indicator=True, how='left')
#Renaming the '_merge' column.
cnv4=cnv4.rename(columns={'_merge':'great_import_from_undern_countries'})
#Checking for 'both' values.
cnv4['great_import_from_undern_countries']=cnv4['great_import_from_undern_countries'].eq('both')

"""Caution : In the food balance dataframe, only 200 lines should have “True” as a value for
the variable great_import_from_undern_countries, all the others should be “False”."""
#Proving that we really have only 200 rows with 'True' value
cnv4.great_import_from_undern_countries.value_counts()

False    33195
True       200
Name: great_import_from_undern_countries, dtype: int64

<b>Identify major trends - Task 5.1
Considering only plant products, what proportion of the global domestic supply is used as :
food, 
feed, 
losses, 
other uses
    <b>

In [14]:
#Assigning variables that we are need for calculating the requested proportions:
totaldomsupply=cnv4.loc[cnv4['origin'].isin(['vegetal']) & (cnv4['year']==givenyear),['domestic_supply_quantity']].sum()
totalfeed=cnv4.loc[cnv4['origin'].isin(['vegetal']) & (cnv4['year']==givenyear), ['feed']].sum()
totalfood=cnv4.loc[cnv4['origin'].isin(['vegetal']) & (cnv4['year']==givenyear),['food']].sum()
totallosses=cnv4.loc[cnv4['origin'].isin(['vegetal']) & (cnv4['year']==givenyear),['losses']].sum()
totalotheruses=cnv4.loc[cnv4['origin'].isin(['vegetal']) & (cnv4['year']==givenyear),['other_uses']].sum()

#Calculating the proportions
propfood=int(totalfood)*100/int(totaldomsupply)
print(f"The proportion of the global domestic supply (considering only plant products) used as 'food' is: {propfood:.2f}%")

propfeed=int(totalfeed)*100/int(totaldomsupply)
print(f"The proportion of the global domestic supply (considering only plant products) used as 'feed' is: {propfeed:.2f}%")

proplosses=int(totallosses)*100/int(totaldomsupply)
print(f"The proportion of the global domestic supply (considering only plant products) used as 'losses' is: {proplosses:.2f}%")

propotheruses=int(totalotheruses)*100/int(totaldomsupply)
print(f"The proportion of the global domestic supply (considering only plant products) used as 'other uses' is: {propotheruses:.2f}%")

#With processing, seed, tourist consumption and residuals our percentages add up to 100% as expected from the task 2 equation.

The proportion of the global domestic supply (considering only plant products) used as 'food' is: 43.13%
The proportion of the global domestic supply (considering only plant products) used as 'feed' is: 13.76%
The proportion of the global domestic supply (considering only plant products) used as 'losses' is: 5.83%
The proportion of the global domestic supply (considering only plant products) used as 'other uses' is: 9.02%


<b> Identify major trends - Task 5.2: How many humans on earth could be fed if all the plant-based food supply (crops), including food and feed, was used for human consumption? Give the results in terms of calories, and protein. Express these two results as a percentage of the world's population. <b>

In [15]:
#world's population = totalpop
#plant-base food supply = totaldomsupply
#According to the FAO, the average minimum daily energy requirement is about 1,800 kilocalories (7,500 kJ) per person.
kcalreq=1800
#According to https://www.fao.org/3/aa040e/aa040e09.htm the protein need is about 50 grams per day.
protreq=50

totalkcal=cnv4.loc[cnv4['origin'].isin(['vegetal']) & (cnv4['year']==givenyear),['dom_sup_kcal']].sum()
totalprot=cnv4.loc[cnv4['origin'].isin(['vegetal']) & (cnv4['year']==givenyear),['food_supply_kgprotein']].sum()

rescal=totalkcal/365/kcalreq
percentcal=int(rescal)*100/int(totalpop)
print(f"Result based on calories:  {percentcal:.2f}%")
print(f"{int(rescal):.0f} humans could have their calories needs satisfied")

resprot=totalprot*1000/365/protreq
percentprot=int(resprot)*100/int(totalpop)
print(f"Result based on proteins:  {percentprot:.2f}%")
print(f"{int(resprot):.0f} humans could have their protein needs satisfied")

Result based on calories:  284.11%
21741046912 humans could have their calories needs satisfied
Result based on proteins:  99.99%
7651823581 humans could have their protein needs satisfied


<b>Identify major trends - Task 5.3: How many humans could be fed with the global food supply? Give the results in terms of calories and protein. Express these two results as a percentage of the world's population<b>

In [16]:
#Task 5.3 same as previous (task 5.2) but no filter on vegetal, so global values.

globalkcal=cnv4.loc[cnv4['year']==givenyear,['dom_sup_kcal']].sum()
globalprot=cnv4.loc[cnv4['year']==givenyear,['food_supply_kgprotein']].sum()

resglcal=globalkcal/365/kcalreq
percentglcal=int(resglcal)*100/int(totalpop)
print(f"Result based on calories: {percentglcal:.2f}%")
print(f"{int(resglcal):.0f} humans could have their calories needs satisfied")

resglprot=globalprot*1000/365/protreq
percentglprot=int(resglprot)*100/int(totalpop)
print(f"Result based on proteins: {percentglprot:.2f}%")
print(f"{int(resglprot):.0f} humans could have their protein needs satisfied")


Result based on calories: 324.84%
24858154599 humans could have their calories needs satisfied
Result based on proteins: 166.30%
12726066988 humans could have their protein needs satisfied


<b>Identify major trends - Task 5.4: From the collected data on undernutrition, what proportion of the world's population is considered undernourished?<b>

In [17]:
#Sum of the total population in countries where the undernutrition is bigger than 10%.
unpop=un.loc[(un['undernourishment']>0.1),['population']].sum()
#Calculating the proportion
unprop=int(unpop)*100/int(totalpop)
print(f"{unprop:.2f}% of the world's population is considered undernourished.")
#Although I've been advised to use this method, it makes no sense to me to consider the whole population of a given country as
#undernourished just because more than 10% in said country are undernourished. So I am leaving it here just in case it's the 
#prefered method for some reason, but for me personally the calculation below is way more accurate.


#Second manner of calculation which seems more accurate to me:
#Sum of all undernourished people
unsum=un.malnourished.sum()
#Calculating the proportion
propun=int(unsum)*100/int(totalpop)

print(f"{propun:.2f}% of the world's population is considered undernourished.")

36.12% of the world's population is considered undernourished.
7.59% of the world's population is considered undernourished.


<b>Identify major trends - Task 5.5.1: Considering the 25 items most exported by the countries with a high rate of undernutrition, which three of them have the greatest other_uses to domestic_supply_quantity ratio and what are they used for?<b>

In [18]:
"""Note : If later in the analysis, ratios such as other_uses:domestic_supply_quantity or
feed;(food+feed) need to be calculated, first filter the food balance data in order to keep
only these 200 lines, then group them by item (with an aggregate) in order to have a
dataframe containing 1 line for each of the 25 products. Calculate the ratios from this
dataframe."""

#Creating a new dframe imt (Identify Major Trends) by filtering out the False values from the main dframe
imt=cnv4.loc[(cnv4['great_import_from_undern_countries']==True) & (cnv4['year']==givenyear)].groupby(by=['item']).sum()
#Calculating ratio
imt['other_uses_to_domestic_supply_quantity']=imt['other_uses']/imt['domestic_supply_quantity']
#Sorting values in descending manner
imt=imt.sort_values(by=['other_uses_to_domestic_supply_quantity'], ascending=False)
#Displaying top 3 items and their ratio
top3=imt[['other_uses_to_domestic_supply_quantity']]
top3.head(3)

Unnamed: 0_level_0,other_uses_to_domestic_supply_quantity
item,Unnamed: 1_level_1
"Alcohol, Non-Food",1.0
"Oilcrops Oil, Other",0.7
Palm Oil,0.56


<b>Identify major trends - Task 5.5.2: Considering the 25 items most exported by the countries with a high rate of undernutrition, which three of them have the greatest feed to (food+feed) ratio and what are they used for?<b>

In [19]:
#Creating a new dframe imt (Identify Major Trends) by filtering out the False values from the main dframe
imt2=cnv4.loc[(cnv4['great_import_from_undern_countries']==True) & (cnv4['year']==givenyear)].groupby(by=['item']).sum()
#Calculating ratio
imt2['feed_to_food_plus_feed']=imt2['feed']/(imt2['food']+imt2['feed'])
#Sorting values in descending manner
imt2=imt2.sort_values(by=['feed_to_food_plus_feed'], ascending=False)
#Displaying top 3 items and their ratio
topthree=imt2[['feed_to_food_plus_feed']]
topthree.head(3)

Unnamed: 0_level_0,feed_to_food_plus_feed
item,Unnamed: 1_level_1
Maize and products,0.41
Pelagic Fish,0.24
Sesame seed,0.11


<b>Identify major trends - Task 5.6: Taking only grains (cereals) for food and feed into account, what proportion (in terms of weight) is used for feed?<b>

In [20]:
#Storing the items found in our cereals extraction (from FAO's site) as series.
cereals=pd.Series(cer.Item)
#Creating a dframe where only grains (cereals) are considered.
imt3=cnv4.loc[(cnv4['item'].isin(cereals)) & (cnv4['year']==givenyear)].groupby(by=['item']).sum()
#Sum of cereals used for food
cerfood=imt3['food'].sum()
#Sum of cereals used for feed
cerfeed=imt3['feed'].sum()
#Cereals used for food and feed
certotal=cerfood+cerfeed
#Calculating the proportion of the feed by taking only food and feed values into account.
feedprop=cerfeed*100/certotal

print(f"{feedprop:.2f}% of grains are used for feed")

42.47% of grains are used for feed


<b>Identify major trends - Task 5.7: How many tons of grains (cereals) could be released if the US reduced its production of animal products by 10%? Convert this quantity to kcal, and the number of potentially fed humans.<b>

In [21]:
#Creating a dframe that contains only cereal values for the US
imt4=cnv4.loc[(cnv4['item'].isin(cereals)) & (cnv4['year']==givenyear) & (cnv4['country']=='United States of America')].groupby(by=['item']).sum()
#The amount used for feeding in kilotons
feedsum=imt4['feed'].sum()
#The amount of total calories for the lines in the dframe
qtykcal=imt4['dom_sup_kcal'].sum()
#The total amount of cereals (regardless of their use)
qtysupply=imt4['domestic_supply_quantity'].sum()
#Result for 10% of US grains used for feeding. Converting from kilotons to tons to answer the question.
restons=feedsum/10*1000
#Calculating how much kcals are there in the restons by a rule of three. We are converting qtysupply to tons instead of kilotons 
#because it needs to be in the same units as restons which is in tons due to previous calculation
reskcal=qtykcal/(qtysupply*1000/restons)
#Calculating how many humans could potentially be fed with the 10% of the supply currently used for feeding.
potfhum=reskcal/365/kcalreq
#print(feedsum, qtykcal, qtysupply, reskcal)
print(f"{restons:.0f} tons of grains could be released if the US spent 10% less grains on feeding.")
print(f"The same value in terms of kcal would be {reskcal:.0f} or {potfhum:.0f} potentially fed humans.")

15813600 tons of grains could be released if the US spent 10% less grains on feeding.
The same value in terms of kcal would be 44096976704151 or 67118686 potentially fed humans.


<b>Identify major trends - Task 5.8.1: In Thailand, what proportion of cassava is exported? <b>

In [22]:
#Creating a new dframe casprop (Cassava Proportion) by filtering out the unnecessary values from the main dframe
tempdf=cnv4.loc[(cnv4['country']=='Thailand') & (cnv4['item']=='Cassava and products') & (cnv4['year']==givenyear)]
#Calculating cassava exportion ratio
tempdf['cassava_export_ratio']=tempdf['export_quantity']/tempdf['domestic_supply_quantity']
casprop=float(tempdf['export_quantity']*100/tempdf['domestic_supply_quantity'])
print(f"The proportion of exported cassava in Thailand is: {casprop:.2f}%")

The proportion of exported cassava in Thailand is: 75.35%


<b>Identify major trends - Task 5.8.2: In Thailand, what is the proportion of undernutrition?<b>

In [23]:
tunprop=float(un.loc[(un['country']=='Thailand'),['undernourishment']].sum()*100)
print(f"The proportion of undernutrition in Thailand is: {tunprop:.2f}%")

The proportion of undernutrition in Thailand is: 7.90%
