### ANALYSIS OF GLOBAL FOOD PRICES 
*  The Dataset used is [Global Food Prices Dataset](https://www.kaggle.com/datasets/lasaljaywardena/global-food-prices-dataset), uploaded by Lasal Jayawardena on Kaggle

In [None]:
# Import Libraries
import os
import pandas as pd
import numpy as np

from pylab import rcParams
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

In [4]:
sns.set_theme(style="whitegrid")

In [5]:
# LOAD DATA
df = pd.read_csv("../input/global-food-prices-dataset/global_food_prices.csv")

In [6]:
df.head()

In [7]:
# EDA
df.info()

In [8]:
df.describe()

In [9]:
df.isnull().sum()

In [15]:
# Top 10 most reported commodity

fig, ax = plt.subplots(1, 1, figsize=(20, 14), sharex=True)
top_20_df = df[["adm0_id", "cm_name"]].groupby("cm_name").agg(['count'])["adm0_id"]['count'].sort_values(ascending=False)[:20]
fig = sns.barplot( x = top_20_df.index, y = top_20_df.values, ax=ax)
fig.set_xticklabels(labels=top_20_df.index , rotation=90)
fig.set_ylabel("Number of Commodities Reported")
fig.set_xlabel("Commodity Name")
fig.set_title("Top 20 Comodities Reported",fontsize=16);

In [16]:
# DATA VISUALIZATION
# Top 20 Countries with most records available

fig, ax = plt.subplots(1, 1, figsize=(20, 14), sharex=True)
top_20_df = df[["adm0_id", "adm0_name"]].groupby("adm0_name").agg(['count'])["adm0_id"]['count'].sort_values(ascending=False)[:20]
fig = sns.barplot( x = top_20_df.index, y = top_20_df.values, ax = ax)
fig.set_xticklabels(labels=top_20_df.index , rotation=90)
fig.set_ylabel("Number of Records in the Dataset")
fig.set_xlabel("Country Name")
fig.set_title("Top 20 Countries on Basis of Most Data Records",fontsize=16);

In [17]:
# Data Visualization of Pakistan
df["adm0_name"].unique()

In [18]:
df = df[df["adm0_name"] == "Pakistan"]

In [19]:
# Create a Date Column
df["Date"] = [  y+ " " +x for x,y in list(zip(df["mp_month"].astype(str).values, df["mp_year"].astype(str).values)) ]
df["Date"] = pd.to_datetime(df["Date"])

In [20]:
df.head()

In [21]:
df = df.set_index("Date")

In [22]:
plt.figure()
df.plot(subplots=True, figsize=(20,20))
plt.xlabel("Date");

In [23]:
# Analyze the Monthly Change in mp_price
usd_month = df.resample("M").mean()
fig, ax = plt.subplots(figsize=(12, 8))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.bar(df.index, df["mp_price"], width=25, align='center')
ax.set_ylabel("LKR (Sri Lakan Rupees)")
ax.set_xlabel('Date')
ax.set_title("Monthly Changes in Food Price",fontsize=16);

In [24]:
# Analyze the Seasonal Change of food price in last two years

df_last_two = df.loc['2019-01-01':'2021-01-01']
fig, ax = plt.subplots(1, 1, figsize=(10, 16), sharex=True)
sns.boxplot(data = df_last_two, x=df_last_two.index.month, y='mp_price', ax=ax)
ax.set_ylabel("PKR (Pakistani Rupees)")
ax.set_title('Food Prices Seasonal Changes',fontsize=16)
ax.set_xlabel('Month');

In [25]:
# Monthly and weekly sampling for last two years
df_month= df.loc['2019-01-01':'2021-01-01'].resample("M").mean()
df_week = df.loc['2019-01-01':'2021-01-01'].resample("W").mean()

In [26]:
# Comparing Average plots from 2019 to 2021of Food prices in Sri Lanka

fig, ax = plt.subplots(figsize=(40, 14))
ax.plot(df_month["mp_price"], marker='.', linestyle='-', linewidth = 0.5, label='Monthly', color='black')
ax.plot(df_week["mp_price"], marker='o', markersize=8, linestyle='-', label='Weekly', color='coral')
ax.set_ylabel("Price in LKR (Sri Lakan Rupees)")
plt.tick_params(labelsize=20)
ax.legend(prop={"size":22});