# Global trends in dietary components

In [None]:
'''
Group 17: Liang Tao, Ke Liu, Chenfeng Wu, Sihan Wang
Project name: Global trends in dietary components
Abstract: Analyze global food trends according to different countries and years
'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pyecharts
from pyecharts import Style
from pyecharts import Map, Geo
from pyecharts import Timeline
from pyecharts import Pie
from pyecharts_snapshot.main import make_a_snapshot

class Visualization:
    def __init__(self):
        # set property of graphs
        self._color = 'firebrick'
        self._transparency = 0.85
        self._fontsize = 12
        
        # load all the data for visualization
        self.macronutrient = pd.read_csv('daily-caloric-supply-derived-from-carbohydrates-protein-and-fat.csv')
        self.foodGroup = pd.read_csv('dietary-compositions-by-commodity-group.csv')
        self.fruit = pd.read_csv('vegetable-consumption-per-capita.csv')
        self.vegetable = pd.read_csv('vegetable-consumption-per-capita.csv')
        self.meat = pd.read_csv('FAOSTAT_data_3-10-2019.csv')
        self.diet = pd.read_csv('dietary-compositions-by-commodity-group.csv')
        self.food = pd.read_csv('global_food_consumption.csv')
        
    def radarChart_for_macronutrient(self):
        '''
        Compute the mean of daily macronutrient intake 
        and plot a radar chart to represent the four nutrients
        '''
        df=self.macronutrient
        
        #labels
        name = ['Animal protein','Plant protein','Fat','Carbohydrates']   
        
        #divide the circle to uniform part
        theta = np.linspace(0,2*np.pi,len(name),endpoint=False)
        
        # mean kilocalories in different macronutrient
        value = [df[i].mean() for i in df.columns[3:]]
        theta = np.concatenate((theta,[theta[0]]))
        value = np.concatenate((value,[value[0]]))
        
        # grpah the plot
        ax = plt.subplot(111,projection = 'polar')      
        ax.plot(theta,value,lw=1,alpha = 0.75,color=self._color)   
        ax.fill(theta,value,color='firebrick',alpha = self._transparency)       
        ax.set_thetagrids(theta*180/np.pi,name,fontsize=self._fontsize)        
        ax.set_ylim(0,2000)                                                     
        ax.set_theta_zero_location('S')                                         
        ax.set_title('daily caloric supply',fontsize = 15,pad=20.0)
        plt.savefig('daily caloric supply.pdf',bbox_inches='tight')
        plt.show()
    
    def boxPlot_for_dailyCaloricSupply(self):
        '''
        foodGroup: Sugar, Oils, fats...
        Extract the main food groups from file 
        Plot a box plot to show daily caloric supply of different food groups
        '''
        df=self.foodGroup
        new_df = df.loc[:,['Sugar (kilocalories per person per day)',
                           'Oils & Fats (kilocalories per person per day)',
                           'Meat (kilocalories per person per day)',
                           'Dairy & Eggs (kilocalories per person per day)',
                           'Fruits & Vegetables (kilocalories per person per day)',
                           'Cereals & Grains (kilocalories per person per day)']]
        sns.set_style("whitegrid")  
        
        # add grid
        ax = sns.boxplot(data=new_df,orient="h",width=0.8,palette=[self._color]*6,fliersize=1) 
        
        for patch in ax.artists:                   
            r, g, b, a = patch.get_facecolor()
            # change transparency
            patch.set_facecolor((r, g, b, self._transparency))
        
        # graph the plot
        ax.tick_params(axis='x',labelsize=self._fontsize)
        plt.rcParams["figure.figsize"] = [12, 4.8]
        ax.set_yticklabels(labels=['Sugar',
                                   'Oils & Fats',
                                   'Meat',
                                   'Dairy & Eggs',
                                   'Fruits & Vegetables',
                                   'Cereals & Grains'],fontsize=self._fontsize)
        plt.xlabel('kilocalories per person per day',fontsize=self._fontsize)
        plt.savefig('box2.pdf',bbox_inches='tight')
        plt.show()
    
    def lineChart_for_fruit_vegetable_meat_intake(self):
        '''
        Plot a line chart to compare the value of fruit/vegetable/meat intake(kg/capita/yr) 
        from 1961 to 2013 of four countries
        '''
        countries = ['India','Russia', 'China','United States']
        plt.rcParams["figure.figsize"] = [8, 4.8]
        files = [fruit, vegetable, meat]
        year = []
        
        # set xticks
        for i in range(1961,2014,10):
            year.append(i)
        
        # plot fruit, vegetable, and meat graph for three countries
        for i in files:
            # read data
            if i == fruit:
                df = self.fruit
            elif i == vegetable:
                df = self.vegetable
            elif i == meat:
                df = self.meat
            
            # clean data for fruit
            if i==fruit:
                print(i)
                col_name = ' (kilograms per person)'
            elif i==vegetable:
                col_name = 'Food Balance Sheets: Vegetables - Food supply quantity (kg/capita/yr) (FAO (2017)) (kg)'
            else:
                col_name = 'Value'
            
            # clean data for meat
            if i==meat:
                y1 = df[df.Country=='India']
                y2 = df[df.Country=='China']
                y3=df[df.Country=='United States of America']
                y4=df[df.Country=='Russian Federation']
                y5=df[df.Country=='USSR']
            
            # clean data for meat
            else:
                y1 = df[df.Entity=='India']
                y2 = df[df.Entity=='China']
                y3=df[df.Entity=='United States']
                y4=df[df.Entity=='Russia']
                y5=df[df.Entity=='USSR']
            
            y6 = list(y5[col_name])+list(y4[col_name])
            
            # plot the graph
            plt.clf()
            plt.plot(range(1961,2014),list(y1[col_name]),marker='.',label='India')
            plt.plot(range(1961,2014),list(y2[col_name]),marker='.',label='China')
            plt.plot(range(1961,2014),list(y3[col_name]),marker='.',label='United States')
            plt.plot(range(1961,2014),y6,marker='.',label='Russia')
            plt.legend(loc='best')
            plt.xticks(year) 
            plt.xlim(1961, 2013)
            plt.grid()
            plt.savefig('line_{}.pdf'.format(i),bbox_inches='tight')
            plt.show()
    
    def barPlot_dietary_component_for_four_countries(self):
        '''
        Compare the main food group intake of four countries
        Compute the mean intake for 6 food groups
        '''
        df = self.diet
        grp = df.groupby('Entity')
        res=grp.mean()
        countries = ['India','Russia', 'China','United States']
        new_df=res.loc[countries,:]
        new_df = new_df.loc[:,['Sugar (kilocalories per person per day)',
                               'Oils & Fats (kilocalories per person per day)',
                               'Meat (kilocalories per person per day)',
                               'Dairy & Eggs (kilocalories per person per day)',
                               'Fruits & Vegetables (kilocalories per person per day)',
                               'Cereals & Grains (kilocalories per person per day)']]
        
        ax=new_df.plot.bar(stacked=False, alpha=0.9)
        ax.set(ylim=[0, 1500])
        plt.xticks(x=countries, rotation=0)
        ax.legend(['Sugar',
                   'Oils& Fats',
                   'Meat',
                   'Dairy& Eggs',
                   'Fruits & Vegetables',
                   'Cereals & Grains'])
        plt.ylabel('kilocalories per person per day')
        plt.savefig('kilocalories per person per day2.pdf',bbox_inches='tight')
        plt.show()
    
    def lineChart_food_consumption(self, start_zero=False):
        '''
        Graph the global food consumpiton trend in 4 major categories:
        Carbs, Vegetable, Fruit, Meat, Seafood
        param: zero_center --> whether to center all graph at the origion
        type: boolean
        '''
        # select useful data
        df = self.food[['Item','Year','Value']]
        
        # combine data in creal and starchy roots
        df_cereal = df.loc[df['Item'] == 'Cereals - Excluding Beer']
        df_starchy = df.loc[(df['Item'] == 'Starchy Roots')]
        
        # reassign index
        df_cereal.index = range(len(df_cereal))
        df_starchy.index = range(len(df_starchy))

        # add value together, crearing carb value
        carb_value = df_cereal['Value'] + df_starchy['Value']
     
        # Extract data for vagetable, fruit, meant
        df_veg = df.loc[df['Item'] == 'Vegetables']
        df_meat = df.loc[df['Item'] == 'Meat']
        df_seafood = df.loc[df['Item'] == 'Fish, Seafood']
        df_fruit = df.loc[df['Item'] == 'Fruits - Excluding Wine']
        
        def sum_over_year(data, start_zero=start_zero):
            '''
            Sum all values over every year. ie. how much 
            param: data --> DataFrame you input
            return: list of all values from 1961 to 2013
            '''
            # creat year list
            year_list = list(range(1961,2014))
            value = []
            
            # sum all all value of the same year
            for year in year_list:
                year_sum = data['Value'][data['Year'] == year].sum()
                value.append(year_sum/(10**3))
            
            # zero center all the data if zero_center == True
            if start_zero == True:
                first_value = value[0]
                value_new = [item-first_value for item in value]
                return value_new
            else:
                return value
            
        # extract data for ploting: vegatable, meat, seafood, fruit, carb
        df_veg = df.loc[df['Item'] == 'Vegetables']
        df_meat = df.loc[df['Item'] == 'Meat']
        df_seafood = df.loc[df['Item'] == 'Fish, Seafood']
        df_fruit = df.loc[df['Item'] == 'Fruits - Excluding Wine']
        df_carb = carb_value.to_frame()
        df_carb['Year'] = df_cereal['Year']
        
        # compute the value based on start_zero or not
        if start_zero == False:
            # compute the data, sum all value every year
            veg_value = sum_over_year(df_veg)
            meat_value = sum_over_year(df_meat)
            seafood_value = sum_over_year(df_seafood)
            fruit_value = sum_over_year(df_fruit)
            carb_value = sum_over_year(df_carb)
            # set opacity, and spcial condition
            opacity = 1
            condition = 'normal'
        else:
            # compute the data, sum all value every year. Start at zero
            veg_value = sum_over_year(df_veg, start_zero)
            meat_value = sum_over_year(df_meat, start_zero)
            seafood_value = sum_over_year(df_seafood, start_zero)
            fruit_value = sum_over_year(df_fruit, start_zero)
            carb_value = sum_over_year(df_carb, start_zero)
            # set opacity 
            opacity = 0.07
            condition = 'start_at_zero'
        
        # graph line chart
        year = list(range(1961, 2014))
        plt.plot(year, veg_value, color='g', label='Vegetable')
        plt.plot(year, meat_value, color='r', alpha=opacity, label='Meat')
        plt.plot(year, seafood_value, color='blue',alpha=opacity, label='Seafood')
        plt.plot(year, fruit_value, color='orange', label='Fruit')
        plt.plot(year, carb_value, color='purple', alpha=opacity, label='Carbohydrate')
        
        plt.legend(bbox_to_anchor=(1.04,1), loc="upper left", frameon=False)
        plt.xlabel('Year')
        plt.ylabel('Consumption (millon tons)')
        plt.grid(alpha=0.5)
        plt.savefig('Global Food Consumption-%s.pdf' %(condition), bbox_inches='tight')
        plt.show()
    
    def initial_style_pyecharts(self):
        '''
        Initialize the style of the charts
        Set the appropriate parameters
        width: the width of the output image
        height: the height of the output image
        background_color: let it as black
        '''
        style = Style(
        title_color="#fff",
        #title_pos="center",
        width=1400,
        height=800,
        background_color='#bbb'
        )
        return style
    
    def dataloader(self, path1, path2):
        '''
        load the data from the csv files
        inputs:
        path1: path for crops 
        path2: path for stockfish
        '''
        self.df=pd.read_csv('FoodSupply_Crops_E_All_Data.csv',encoding = "ISO-8859-1")
        self.df=np.matrix(self.df.values)
        self.df2=pd.read_csv('FoodSupply_LivestockFish_E_All_Data.csv',encoding = "ISO-8859-1")
        self.df2=np.matrix(self.df2.values)
        
    def generate_map(self,style):
        '''
        Generate the corresponding world map according to the data we have
        This function will directly save the charts as 'distribution.html' in this folder
        '''
        countries=np.ndarray.flatten(np.array(self.df[:,1]).astype(np.str)).tolist()
        countries=set(countries)
        veg_con=[]
        healthy_veg_con=[]
        countries=sorted(countries)
        for j in range(53):
            one_year_con=[]
            health_oyc=[]
            for i in countries:
                ind1=np.intersect1d((np.where(self.df[:,1]==i))[0],(np.where(self.df[:,3]=='Vegetables'))[0])
                temp=self.df[ind1]
                ind2=(np.where(temp[:,4]==645))[0]
                one_year_con.append(np.ndarray.flatten(temp[ind2]).tolist()[0][7+j*2])
                health_oyc.append((np.ndarray.flatten(temp[ind2]).tolist()[0][7+j*2])>85)
            one_year_con.append(one_year_con[countries.index('Denmark')])
            health_oyc.append(health_oyc[countries.index('Denmark')])
            veg_con.append(one_year_con)
            healthy_veg_con.append(health_oyc)
        countries[countries.index('United States of America')]='United States'
        countries[countries.index('Russian Federation')]='Russia'
        countries.append('Greenland')
        for i in range(31):
            veg_con[i][countries.index('Russia')]=veg_con[i][countries.index('USSR')]
        maps=[]
        years=[1961+i for i in range(53)]
        timeline = Timeline(is_auto_play=False, timeline_bottom=0, timeline_play_interval=50,width=1100)
        for i in range(53):
            MAP = Map("world map", **style.init_style)
            MAP.add("world map", countries, veg_con[i], maptype="world",  is_visualmap=True, visual_range=[0,200],visual_text_color='#fff', visual_top=50, is_map_symbol_show=False)
            timeline.add(MAP,str(years[i]))
        timeline.render(path='distribution.html')
        
    def generate_pie(self,country):
        '''
        Generate the corresponding pie charts
        param: country --> the country we want to use in the dataset
        type: list
        output: save the pie chart as 'country.html' in this folder
        '''
        attr1=np.ndarray.flatten(np.array(self.df[:,3]).astype(np.str)).tolist()
        attr1=set(attr1)
        attr2=np.ndarray.flatten(np.array(self.df2[:,3]).astype(np.str)).tolist()     
        attr2=set(attr2)
        composition1={'Vegetables':0,'Fruits - Excluding Wine':0, 'Cereals - Excluding Beer':0}
        composition2={'Meat':0, 'Milk, Whole':0}
        for i in composition1.keys():
            ind1=np.intersect1d((np.where(self.df[:,1]==country))[0],(np.where(self.df[:,3]==i))[0])
            temp=self.df[ind1]
            ind2=(np.where(temp[:,4]==645))[0]
            #print(np.ndarray.flatten(temp[ind2]).tolist()[0][107])
            composition1[i]=(np.ndarray.flatten(temp[ind2]).tolist()[0][7+51*2])
        for i in composition2.keys():
            ind1=np.intersect1d((np.where(self.df2[:,1]==country))[0],(np.where(self.df2[:,3]==i))[0])
            temp=self.df2[ind1]
            ind2=(np.where(temp[:,4]==645))[0]
            composition2[i]=(np.ndarray.flatten(temp[ind2]).tolist()[0][7+51*2])
        attr=[]
        v1=[]
        for i,j in composition1.items():
            attr.append(i)
            v1.append(j)
        for i,j in composition2.items():
            attr.append(i)
            v1.append(j)
        pie = Pie("Diet")
        pie.add("", attr, v1, is_label_show=True, center=[50,50], rosetype = 'radius')
        pie.render(path=country+'.html')
    
## files we need
# macronutrient='daily-caloric-supply-derived-from-carbohydrates-protein-and-fat.csv'
# foodGroup='dietary-compositions-by-commodity-group.csv'
# fruit='fruit-consumption-per-capita.csv'
# vegetable='vegetable-consumption-per-capita.csv'
# meat='FAOSTAT_data_3-10-2019.csv'
# diet='dietary-compositions-by-commodity-group.csv'


In [None]:
path1='FoodSupply_Crops_E_All_Data.csv'
path2='FoodSupply_LivestockFish_E_All_Data.csv'
countries=['China','United States of America','India','Russian Federation']

vis = Visualization()     # an instance
vis.radarChart_for_macronutrient()
vis.boxPlot_for_dailyCaloricSupply()
vis.lineChart_for_fruit_vegetable_meat_intake()
vis.barPlot_dietary_component_for_four_countries()
vis.lineChart_food_consumption()
vis.lineChart_food_consumption(start_zero=True)

style=vis.initial_style_pyecharts()
vis.dataloader(path1,path2)
vis.generate_map(style)
for i in countries:
    vis.generate_pie(i)