In [None]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import matplotlib.gridspec as gs

import warnings
warnings.filterwarnings('ignore')

<p style="padding:10px;background-color:#B4F8C8;margin:0;color:#000C66;font-family:sans serif;font-size:240%;text-align:center; overflow:hidden; font-weight:500; font-style:italic"><a id='1'></a>1. Exploring the data</p>

<p style="text-align:center; "></p>

In [None]:
df = pd.read_csv('/kaggle/input/food-waste/Food Waste data and research - by country.csv')
df.info()

In [None]:
df.head()

In [None]:
df.isna().sum()

<p style="padding:10px;background-color:#B4F8C8;margin:0;color:#000C66;font-family:sans serif;font-size:240%;text-align:center; overflow:hidden; font-weight:500; font-style:italic"><a id='2'></a>2. Which region of the world wastes the most food ? </p>

<p style="text-align:center; "></p>

In [None]:
results = {}
columns_to_sum = ('Household estimate (tonnes/year)', 'Retail estimate (tonnes/year)',
                      'Food service estimate (tonnes/year)')

for column in columns_to_sum:
    df_region = pd.DataFrame(df.groupby('Region')[column].sum().sort_values())
    results[column] = df_region

#print(results)

df_results = pd.concat(results.values(), axis=1)
df_results

In [None]:
plt.figure(figsize=(12, 8),dpi=200)
df_results.plot.bar(cmap = 'Paired')
plt.xlabel('Region')
plt.ylabel('Estimate (tonnes/year)')
plt.title('Estimates by Region')
plt.xticks(rotation=30, size= 7, fontfamily='monospace',color='navy', rotation_mode='anchor', ha='right')
plt.yticks(size= 7, fontfamily='monospace',color='navy',)
plt.legend(loc="upper left", prop={'size': 7})
plt.title('Estimated food waste by world region', size=12, color='navy')
plt.show();

In [None]:
def sum_by_region(df):
    results = {}
    columns_to_sum = ['Household estimate (tonnes/year)', 'Retail estimate (tonnes/year)',
                      'Food service estimate (tonnes/year)']

    grouped = df.groupby('Region')

    for column in columns_to_sum:
        results[column] = grouped[column].sum()

    df_results = pd.DataFrame(results)
    df_results = df_results.sort_values(by= ['Household estimate (tonnes/year)', 'Retail estimate (tonnes/year)',
                      'Food service estimate (tonnes/year)'])
    df_results = df_results.reset_index()
    return df_results

df_region = pd.DataFrame(sum_by_region(df))
# print(df_region)

<p style="padding:10px;background-color:#ECF87F;margin:0;color:#000C66;font-family:sans serif;font-size:180%;text-align:left; overflow:hidden; font-weight:500; font-style:italic"><a id='2.1'></a>2.1 Focus on two region of the world ? </p>

<p style="text-align:left; "></p>

I have chosen to focus my analysis on two regions: <b>Eastern Asia and Northern America, as the barplot shows that these regions post rather surprising results.</b>
- Indeed, Eastern Asia shows equality between the Household estimates and Food service estimates bars.
- Northern America has the highest Food service estimates bar in the world.

In [None]:
eastern_asia = pd.DataFrame(df[df['Region'] == 'Eastern Asia']).set_index('Country')

eastern_asia[['Household estimate (tonnes/year)', 'Retail estimate (tonnes/year)', 'Food service estimate (tonnes/year)']].plot.bar(cmap = 'Paired')
plt.xlabel('Countries in Eastern Asia')
plt.ylabel('Estimate (tonnes/year)')
plt.xticks(rotation=30, size= 9, fontfamily='monospace',color='navy', rotation_mode='anchor', ha='right')
plt.yticks(size= 7, fontfamily='monospace',color='navy',)
plt.legend(loc="upper right", prop={'size': 7})
plt.title('Estimates in Eastern Asia', size=12, color='navy')

comment = """
"More dishes than guests, a tradition in China".

Studies dating back to 2014 show that large quantities of food are still
thrown away, especially in restaurants, where an estimated 19-30% of food
is wasted. When it comes to meals eaten at home, wastage is lower, with only
7% of food ending up in the garbage can.

But on a national scale, these losses are highly problematic: according to
a study carried out by China Agricultural University, the amount of protein
thrown away would feed the equivalent of 260 million people!

In China, it is traditional to order as many - if not more - dishes as there
are guests, in order to make a good impression.

In Chinese culture, leaving food on your plate is a sign of politeness.
It shows that the person inviting - or the one who cooked - had planned
enough food and that the guests did not lack anything. If the Chinese
authorities want to reduce food waste, it will require a change in mentality,
to make citizens accept that ordering less food won't make them "lose face".

For example, the Chinese government is now encouraging restaurants to serve
one less meal than the number of guests around a table.
"""
plt.annotate(comment, xy=(1.1, 0.5), xycoords='axes fraction', fontsize=9, va='center', ha='left')
plt.show();

In [None]:
northern_america = pd.DataFrame(df[df['Region'] == 'Northern America']).set_index('Country')

northern_america[['Household estimate (tonnes/year)', 'Retail estimate (tonnes/year)', 'Food service estimate (tonnes/year)']].plot.bar(cmap = 'Paired')
plt.xlabel('Countries in Northern America')
plt.ylabel('Estimate (tonnes/year)')
plt.xticks(rotation=30, size= 9, fontfamily='monospace',color='navy', rotation_mode='anchor', ha='right')
plt.yticks(size= 7, fontfamily='monospace',color='navy',)
plt.legend(loc="upper left", prop={'size': 7})
plt.title('Estimates in Northern America', size=12, color='navy')


comment = """
Americans waste nearly 150,000 tons of food a day, according to a study published Wednesday,
April 18 in the journal Plos One.

This food waste primarily concerns the healthiest products, with fruit, vegetables and dairy
products taking up a large share of the trash.

According to the study, there is only one solution to this immense food waste: raise consumer
awareness. Consumers need to be made aware of the waste that can be generated by a balanced diet,
and taught how to store purchased foodstuffs - particularly fresh produce - properly, and how to
prepare them better so that less is thrown away.
"""
plt.annotate(comment, xy=(1.1, 0.5), xycoords='axes fraction', fontsize=9, va='center', ha='left')

plt.show();

### Volume of food thrown away by catering services by country 2021

This statistic presents, by country, the volume of food waste produced by food services in 2021. At that date, China is responsible for the largest volume of food thrown away by food services, with more than 65 million tonnes of food waste per year. China is followed by the United States, with 21 million tonnes per year. The two giants are well ahead of the rest of the world, since in third place we find Malaysia, whose restaurant chains throw away 3 million tonnes of food a year, a volume 22 times less than that of China and 7 times less than that of the United States. French restaurants throw away almost 2 million tonnes of food a year.

<p style="padding:10px;background-color:#B4F8C8;margin:0;color:#000C66;font-family:sans serif;font-size:240%;text-align:center; overflow:hidden; font-weight:500; font-style:italic"><a id='3'></a>3. Presentation of the 10 most responsible countries VS the 10 countries that waste the most food by Kg/capita/year </p>

<p style="text-align:center; "></p>

In [None]:
most_responsible_countries = df.nsmallest(10, 'combined figures (kg/capita/year)')
countries_waste_most_food = df.nlargest(10, 'combined figures (kg/capita/year)')

top_n=3

sns.set_style('white')
fig = plt.figure(figsize =(10,8))
g = gs.GridSpec(ncols = 1, nrows = 2, figure = fig)
plt.suptitle("The 10 responsible countries VS the 10 countries that waste the most food ", fontfamily='monospace',color='navy', size=16, y=0.93)

ax1 = plt.subplot(g[0,0])

top_10 = countries_waste_most_food
bottom_10 = most_responsible_countries.sort_values(ascending=False, by='combined figures (kg/capita/year)')

ax1 = sns.barplot(data = top_10, x = top_10['combined figures (kg/capita/year)'],y = top_10['Country'], color = '#FC2E20' , alpha=0.8)
for i in ax1.containers:
    ax1.bar_label(i,)
ax1.xaxis.set_visible(False)

for i in range(top_n, len(most_responsible_countries)):
    ax1.get_children()[i].set_color('#FDB750')

plt.ylabel('The least responsible countries for food waste', fontfamily ='monospace',loc ='top',color ='orange', size = 8)

ax2 = plt.subplot(g[1,0], sharex = ax1)

ax2 = sns.barplot(data = bottom_10, x = bottom_10['combined figures (kg/capita/year)'],y = bottom_10['Country'], color = '#0048A4', alpha=0.97)

for i in ax2.containers:
    ax2.bar_label(i,)

for i in range(0, len(most_responsible_countries)-top_n):
    ax2.get_children()[i].set_color('#68A4F1')

for s in ['left','right','top','bottom']:
    ax1.spines[s].set_visible(False)
    ax2.spines[s].set_visible(False)

ax2.set_xticks((0,30,60,90,120,140,170,200,230,260))
ax2.set_xticklabels((0,30,60,90,120,140,170,200,230,260), fontfamily='monospace',color='navy', size=8)

plt.xlabel('Combined figures (kg/capita/year)', fontfamily ='monospace',color ='black', size = 10)

plt.ylabel('The most responsible countries ', fontfamily ='monospace',loc ='top',color ='#0048A4', size = 8)

plt.show();

According to the results, Malaysia, Nigeria and Rwanda are the worst performers when it comes to food waste in kg per capita over a year. However, as the data are not really reliable, this breakdown is probably wrong.

In reality :
North America is characterized by a high level of waste in kilos per capita. The United States leads the way, with no less than 415 kilos per inhabitant, for a total of 126 million tonnes of food wasted every year. Canada adds up to a total of 11.2 million tonnes each year, for an average of 303 kilos per inhabitant. Mexico, although less plagued by per capita waste, with an average of 155 kilos per inhabitant, wastes an average of 20 million tonnes per year.

Europe has a complex context, as not all countries offer up-to-date or accurate figures. Comprising many countries, it represents 88 million tonnes of food waste.

Finally, Asia is a zone characterized by considerable food waste, but low per capita volumes. China, for example, wastes 61 million tonnes every year, but at a rate of only 44 kilos per capita.

In Oceania, we find Australia, which wastes a high volume per capita (298 kilos per head), but wastes a total of 7.3 million tonnes due to its small population.

In [None]:
def plot_top_and_bottom(df, column, n=10, xlabel=None, ylabel=None, title=None, xtick_rotation=None):
    # Select the first 10 countries with the lowest values for the given column
    smallest = df.nsmallest(n, column).set_index('Country')

    # Select the top 10 countries with the highest values for the given column
    largest = df.nlargest(n, column).set_index('Country')

    # Draw the barplots
    fig, axes = plt.subplots(1, 2, figsize=(10, 6))

    smallest[column].plot.bar(ax=axes[0], color='blue')
    axes[0].set_title(f'Top {n} Countries with Lowest {column}', size=9, color ='blue')
    axes[0].set_ylabel(ylabel if ylabel else column)
    axes[0].set_xlabel(xlabel if xlabel else 'Country')
    axes[0].tick_params(axis='x', rotation=xtick_rotation, labelsize=9, labelcolor='navy')
    axes[0].tick_params(axis='y', labelsize=7, labelcolor='navy')
    axes[0].set_xticklabels(smallest.index, rotation=xtick_rotation, ha='right', fontsize=7, color='navy')
    axes[0].legend(loc="upper left", prop={'size': 7})

    largest[column].plot.bar(ax=axes[1], color='orange')
    axes[1].set_title(f'Top {n} Countries with Highest {column}', size=9, color ='orange')
    axes[1].set_ylabel(ylabel if ylabel else column)
    axes[1].set_xlabel(xlabel if xlabel else 'Country')
    axes[1].tick_params(axis='x', rotation=xtick_rotation, labelsize=9, labelcolor='navy')
    axes[1].tick_params(axis='y', labelsize=7, labelcolor='navy')
    axes[1].set_xticklabels(largest.index, rotation=xtick_rotation, ha='right', fontsize=7, color='navy')
    axes[1].legend(loc="upper right", prop={'size': 7})

    fig.suptitle(title if title else f'Estimates', size=12, color='navy')

    plt.tight_layout()
    plt.show()

plot_top_and_bottom(df, 'Household estimate (tonnes/year)', xlabel='Countries', ylabel='Household Estimate (tonnes/year)',
                    title='Household Estimates', xtick_rotation=30)

plot_top_and_bottom(df, 'Retail estimate (tonnes/year)', xlabel='Countries', ylabel='Retail Estimate (tonnes/year)',
                    title='Retail Estimates', xtick_rotation=30)

plot_top_and_bottom(df, 'Food service estimate (tonnes/year)', xlabel='Countries', ylabel='Food service Estimate (tonnes/year)',
                    title='Food service Estimates', xtick_rotation=30)

#### These graphs show something very logical. The larger and more populous the country, the greater the amount of food waste.


#### The reverse is also true: the blue bars represent the food waste of the countries with the smallest surface areas in the world, and which produce the least waste. This is because they are less populated.