<a href="https://colab.research.google.com/github/TatKhachatryan/Time-Series-Analysis-Project/blob/main/ts_analysis_food_prices_arm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
# !pip install -q condacolab
# !pip install -U kaleido
# import condacolab
# condacolab.install()
# !conda install -c plotly python-kaleido

In [46]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [47]:
# data source: https://data.humdata.org/dataset/wfp-food-prices-for-armenia

data = pd.read_csv('/content/drive/MyDrive/Food_Prices_Armenia/wfp_food_prices_arm.csv')
data

Unnamed: 0,date,admin1,admin2,market,latitude,longitude,category,commodity,unit,priceflag,pricetype,currency,price,usdprice
0,#date,#adm1+name,#adm2+name,#loc+market+name,#geo+lat,#geo+lon,#item+type,#item+name,#item+unit,#item+price+flag,#item+price+type,#currency,#value,#value+usd
1,1996-01-15,,,National Average,,,non-food,Fuel (diesel),L,actual,Retail,AMD,100.0,0.2487
2,1996-01-15,,,National Average,,,non-food,Fuel (petrol-gasoline),L,actual,Retail,AMD,130.0,0.3233
3,1996-02-15,,,National Average,,,non-food,Fuel (diesel),L,actual,Retail,AMD,100.0,0.2484
4,1996-02-15,,,National Average,,,non-food,Fuel (petrol-gasoline),L,actual,Retail,AMD,120.0,0.2981
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35392,2023-08-15,Yerevan,Yerevan,Yerevan,40.181111,44.513611,vegetables and fruits,Cabbage,KG,forecast,Retail,AMD,0.0,0.0
35393,2023-08-15,Yerevan,Yerevan,Yerevan,40.181111,44.513611,vegetables and fruits,Carrots,KG,forecast,Retail,AMD,0.0,0.0
35394,2023-08-15,Yerevan,Yerevan,Yerevan,40.181111,44.513611,vegetables and fruits,Cucumbers (greenhouse),KG,forecast,Retail,AMD,0.0,0.0
35395,2023-08-15,Yerevan,Yerevan,Yerevan,40.181111,44.513611,vegetables and fruits,Onions,KG,forecast,Retail,AMD,0.0,0.0


So the columns' descriptions are:

*   **date** - baldate
*   **admin1** - province (region)
*   **admin2** - city/village/region names
*   **market** - local market name
*   **latitude** - geographic measure
*   **longitude** - geographic measure
*   **category** - item type
*   **commodity** - item name
*   **unit** - item unit
*   **priceflag** - item price flag
*   **pricetype** - item price type
*   **currency** - item currency
*   **price** - item price value
*   **usdprice** - item price value in usd




In [48]:
# dropping the first unnecessary row
data = data.iloc[1:]
data = data.reset_index(drop=True)

# changing the column names to more friendly names
data.columns = ['Date', 'Province', 'Area', 'Market', 'Latitude', 'Longitude',
       'Item Category', 'Item Name', 'Unit', 'Priceflag', 'Pricetype', 'Currency',
       'Price', 'USD_price']

# Data Processing
## Missing Values

In [49]:
missing_values = data.isna().sum()
missing_values[missing_values>0]

Province     324
Area         324
Latitude     324
Longitude    324
dtype: int64

In [50]:
print("Market Names:", data[data['Province'].isnull()]['Market'].unique())
print("Item Names:", data[data['Province'].isnull()]['Item Name'].unique())
print()

data[data['Province'].isnull()].head()

Market Names: ['National Average']
Item Names: ['Fuel (diesel)' 'Fuel (petrol-gasoline)' 'Exchange rate']



Unnamed: 0,Date,Province,Area,Market,Latitude,Longitude,Item Category,Item Name,Unit,Priceflag,Pricetype,Currency,Price,USD_price
0,1996-01-15,,,National Average,,,non-food,Fuel (diesel),L,actual,Retail,AMD,100.0,0.2487
1,1996-01-15,,,National Average,,,non-food,Fuel (petrol-gasoline),L,actual,Retail,AMD,130.0,0.3233
2,1996-02-15,,,National Average,,,non-food,Fuel (diesel),L,actual,Retail,AMD,100.0,0.2484
3,1996-02-15,,,National Average,,,non-food,Fuel (petrol-gasoline),L,actual,Retail,AMD,120.0,0.2981
4,1996-03-15,,,National Average,,,non-food,Fuel (diesel),L,actual,Retail,AMD,110.0,0.2726


So, the reason that we have 324 missing values is caused by Market Name called **"National Average"**, which is logical,
as "National Average" can not have a geographical latitude/longtitude or Province and Area.

In [51]:
# data types
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35396 entries, 0 to 35395
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Date           35396 non-null  object
 1   Province       35072 non-null  object
 2   Area           35072 non-null  object
 3   Market         35396 non-null  object
 4   Latitude       35072 non-null  object
 5   Longitude      35072 non-null  object
 6   Item Category  35396 non-null  object
 7   Item Name      35396 non-null  object
 8   Unit           35396 non-null  object
 9   Priceflag      35396 non-null  object
 10  Pricetype      35396 non-null  object
 11  Currency       35396 non-null  object
 12  Price          35396 non-null  object
 13  USD_price      35396 non-null  object
dtypes: object(14)
memory usage: 3.8+ MB


In [53]:
# convert object types to their corresponding types
data['Date'] = pd.to_datetime(data['Date'])
data['Price'] = data['Price'].astype(float)
data['USD_price'] = data['USD_price'].astype(float)