### Import all the necessary libraries 

In [30]:
import pandas as pd # data manipulation library
import numpy as np # array computation library
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff

### Data preparation
Load the data  from the csv file

In [4]:
meat_data = pd.read_csv(r"D:/suhas_github/Artificial-Intelligence/Exploratory Data Analysis/Worldwide meat consumprion/data/meat_consumption_worldwide.csv")

In [5]:
meat_data

Unnamed: 0,LOCATION,SUBJECT,MEASURE,TIME,Value
0,AUS,BEEF,KG_CAP,1991,27.721815
1,AUS,BEEF,KG_CAP,1992,26.199591
2,AUS,BEEF,KG_CAP,1993,26.169094
3,AUS,BEEF,KG_CAP,1994,25.456134
4,AUS,BEEF,KG_CAP,1995,25.340226
...,...,...,...,...,...
13755,MYS,SHEEP,THND_TONNE,2022,44.154151
13756,MYS,SHEEP,THND_TONNE,2023,44.897588
13757,MYS,SHEEP,THND_TONNE,2024,45.612107
13758,MYS,SHEEP,THND_TONNE,2025,46.325312


Before we proceed let us check the dataset for missing values and get more information on the dataset

In [6]:
meat_data.shape # returns the shape of the dataset i.e the number of rows and columns

(13760, 5)

In [8]:
meat_data.isnull().any() # checks whether there are any missing values

LOCATION    False
SUBJECT     False
MEASURE     False
TIME        False
Value       False
dtype: bool

We can observe that the dataset consists of **13760** rows and  5 columns. The dataset does not have any missing values.Further, we can get the summary of the dataset using `.info()` and basic statistic computation using `.describe()` methods

In [9]:
meat_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13760 entries, 0 to 13759
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   LOCATION  13760 non-null  object 
 1   SUBJECT   13760 non-null  object 
 2   MEASURE   13760 non-null  object 
 3   TIME      13760 non-null  int64  
 4   Value     13760 non-null  float64
dtypes: float64(1), int64(1), object(3)
memory usage: 537.6+ KB


Observations:
 - all the columns are non-null type
 - Time column andd value column are int64 and float64 respectively, while the rest of the columns are objects

In [11]:
# further exploring 
meat_data.nunique()

LOCATION       48
SUBJECT         4
MEASURE         2
TIME           37
Value       11383
dtype: int64

In [12]:
# explore all the contries and organizations in the location column
meat_data.LOCATION.unique()

array(['AUS', 'CAN', 'JPN', 'KOR', 'MEX', 'NZL', 'TUR', 'USA', 'DZA',
       'ARG', 'BGD', 'BRA', 'CHL', 'CHN', 'COL', 'EGY', 'ETH', 'GHA',
       'HTI', 'IND', 'IDN', 'IRN', 'ISR', 'KAZ', 'MYS', 'MOZ', 'NGA',
       'PAK', 'PRY', 'PER', 'PHL', 'RUS', 'SAU', 'ZAF', 'SDN', 'TZA',
       'THA', 'UKR', 'URY', 'VNM', 'ZMB', 'WLD', 'SSA', 'OECD', 'BRICS',
       'EU28', 'NOR', 'CHE'], dtype=object)

In [26]:
# explore the measure column to find how thousand tonnes and kg/capita is divided
meat_data.MEASURE.value_counts()

THND_TONNE    7042
KG_CAP        6718
Name: MEASURE, dtype: int64

In [27]:
meat_data.SUBJECT.unique()

array(['BEEF', 'PIG', 'POULTRY', 'SHEEP'], dtype=object)

We can see that different location data that is available and the division of kg and tonne in measure column

In [43]:
hist_data = meat_data['Value']
fig = ff.create_distplot([np.log1p(hist_data)],['Value of meat'],bin_size = [1])
fig.update_layout(height =500,width =500)
fig.show()

The above graph shows the distribution of values of meat consumption 

In [44]:
# split the measure column into kg/capita and tones columns as follows
df = pd.get_dummies(meat_data,columns=["MEASURE"])
df

Unnamed: 0,LOCATION,SUBJECT,TIME,Value,MEASURE_KG_CAP,MEASURE_THND_TONNE
0,AUS,BEEF,1991,27.721815,1,0
1,AUS,BEEF,1992,26.199591,1,0
2,AUS,BEEF,1993,26.169094,1,0
3,AUS,BEEF,1994,25.456134,1,0
4,AUS,BEEF,1995,25.340226,1,0
...,...,...,...,...,...,...
13755,MYS,SHEEP,2022,44.154151,0,1
13756,MYS,SHEEP,2023,44.897588,0,1
13757,MYS,SHEEP,2024,45.612107,0,1
13758,MYS,SHEEP,2025,46.325312,0,1


In [47]:
# visualize the cor-relations between the columns in the dataset
fig = px.imshow(df.corr())
fig.update_layout(height =500,width =700)
fig.show()

In [66]:
# Let us explore the consumption value across various locations
# Group the dataset by different locations and add  the value of meat consumption
# reset index is used to set an index column and the values are sorted in descending order using sort_values
val_count = df.groupby('LOCATION')[['Value']].sum().reset_index().sort_values('Value',ascending = False)
px.bar(val_count,val_count['LOCATION'],val_count['Value'],color = 'LOCATION',title = 'Location-wise total meat consumption')

From the abpove figure, we can observe that **"WLD, BRICS, OECD, CHN, EU28" and "USA"** have very high meat consumptions. we can limit the graph to top 15 locations as follows

In [68]:
px.bar(val_count[:10],val_count[:15]['LOCATION'],val_count[:15]['Value'],color = val_count[:15]['LOCATION'],title = 'Location-wise total meat consumption')

now that we know the amount of meat consumed by different locations. let us explore the consumption of meat over time.

In [74]:
val_time = df.groupby('TIME')[['Value']].sum().reset_index().sort_values('Value',ascending = False)
px.bar(val_time, val_time['TIME'], val_time['Value'] )

In [79]:
df_time = df.groupby(['TIME','SUBJECT'])[['Value']].sum().reset_index()
fig = px.line(df_time,x ='TIME',y ='Value',color='SUBJECT',title='meat-wise growth over years')
fig.update_layout(height =500,width =700)

From the above figure we can observe that over period of time the consumption of meat has inceased from around 150K in 1990 to around 900K in 2020 and is projected to 950K in 2026.

We can see that the pig and poultry have been popular since the 90's. Beef saw a spike and increased at the end of the 90's and early 2000.

Lets move on to understand which type of meat is highly prefered.

In [65]:
val_meat = df.groupby('SUBJECT')[['Value']].sum().reset_index().sort_values('Value',ascending = False)
fig =px.bar(val_meat, val_meat['SUBJECT'], val_meat['Value'])
fig1 = px.pie(val_meat,values=val_meat['Value'], names='SUBJECT', title='Meat distribution')
fig1.update_layout(height =500,width =500)
fig.update_layout(height =500,width =500)
fig.show()
fig1.show()

### Observations:
  - Clearly pig is the highest meat consumed followed by polutry and beef.
  - The meat that is least consumed is sheep

#### Let us now observe the change in meat eating habit over years

In [97]:
ton_meh = df[df['MEASURE_THND_TONNE'] ==1]
kg_meh = df[df['MEASURE_KG_CAP'] ==1]

from plotly.subplots import make_subplots
def process(df):
  df91_95 = df.loc[(df['TIME']>=1991) & (df['TIME']<=1995)]
  df96_05 = df.loc[(df['TIME']>=1996) & (df['TIME']<=2005)]
  df06_10 = df.loc[(df['TIME']>=2006) & (df['TIME']<=2010)]
  df11_20 = df.loc[(df['TIME']>=2011) & (df['TIME']<=2020)]
  df21_25 = df.loc[(df['TIME']>=2021) & (df['TIME']<=2025)]
  df91_95 = df91_95.groupby(by = ['SUBJECT'])[['Value']].sum().reset_index()
  df96_05 = df96_05.groupby(by = ['SUBJECT'])[['Value']].sum().reset_index()
  df06_10 = df06_10.groupby(by = ['SUBJECT'])[['Value']].sum().reset_index()
  df11_20 = df11_20.groupby(by = ['SUBJECT'])[['Value']].sum().reset_index()
  df21_25 = df21_25.groupby(by = ['SUBJECT'])[['Value']].sum().reset_index()
  fig = make_subplots(rows=1, cols=5,specs=[[{'type':'domain'}, {'type':'domain'},{'type':'domain'},{'type':'domain'},{'type':'domain'}]])
  fig.append_trace(go.Pie(values=df91_95['Value'], labels=df91_95['SUBJECT'], title='1991-1995'),row=1,col=1)
  fig.append_trace(go.Pie(values=df96_05['Value'], labels=df96_05['SUBJECT'], title='1996-2005'),row=1,col=2)
  fig.append_trace(go.Pie(values=df06_10['Value'], labels=df06_10['SUBJECT'], title='2006-2010'),row=1,col=3)
  fig.append_trace(go.Pie(values=df11_20['Value'], labels=df11_20['SUBJECT'], title='2011-2021'),row=1,col=4)
  fig.append_trace(go.Pie(values=df21_25['Value'], labels=df21_25['SUBJECT'], title='2021-2025'),row=1,col=5)
  fig.update_layout(height=300, width=800, title_text="Change of meat consumption over 35 years")
  fig.show()

process(ton_meh)
process(kg_meh)
  

### Meat consumption in India
let us now look at the consumption of meat in India

In [101]:
ind = df[df['LOCATION'] == 'IND']
ind

Unnamed: 0,LOCATION,SUBJECT,TIME,Value,MEASURE_KG_CAP,MEASURE_THND_TONNE
2794,IND,BEEF,1990,1.649089,1,0
2795,IND,BEEF,1991,1.622147,1,0
2796,IND,BEEF,1992,1.615513,1,0
2797,IND,BEEF,1993,1.575704,1,0
2798,IND,BEEF,1994,1.541686,1,0
...,...,...,...,...,...,...
13461,IND,PIG,2022,411.192516,0,1
13462,IND,PIG,2023,420.963345,0,1
13463,IND,PIG,2024,431.118299,0,1
13464,IND,PIG,2025,440.510991,0,1


In [103]:
ton_meh = ind[ind['MEASURE_THND_TONNE'] ==1]
kg_meh = ind[ind['MEASURE_KG_CAP'] ==1]
val_time = ind.groupby('TIME')[['Value']].sum().reset_index().sort_values('Value',ascending = False)
df_time = ind.groupby(['TIME','SUBJECT'])[['Value']].sum().reset_index()
val_meat = ind.groupby('SUBJECT')[['Value']].sum().reset_index().sort_values('Value',ascending = False)
meat_fig =px.bar(val_meat, val_meat['SUBJECT'], val_meat['Value'])
pie_fig = px.pie(val_meat,values=val_meat['Value'], names='SUBJECT', title='Meat distribution')
time_fig = px.line(df_time,x ='TIME',y ='Value',color='SUBJECT',title='meat-wise growth over years')
bar_fig = px.bar(val_time, val_time['TIME'], val_time['Value'])
pie_fig.update_layout(height =500,width =500)
meat_fig.update_layout(height =500,width =500)
bar_fig.update_layout(height= 500,width =500)
time_fig.update_layout(height= 500,width =500)
bar_fig.show()
time_fig.show()
pie_fig.show()
meat_fig.show()
process(ton_meh)
process(kg_meh)

### Observations

From all the graphs we can conclude on the following for Indian meat consumption:
 - Meat consumption has been gradually increasing over time
 - Till 2007 beef was the choice of meat in india and then poultry shot up . Still beef is the second most popular meat
 - In the change of meat consumption we can clearly see the change from beef to poultry over the years.

***