In [15]:
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [16]:
# load dataset
dataset = pd.read_csv('../../data/raw/wfp_food_prices_moz.csv')

In [17]:
dataset.head()

Unnamed: 0,date,admin1,admin2,market,latitude,longitude,category,commodity,unit,priceflag,pricetype,currency,price,usdprice
0,#date,#adm1+name,#adm2+name,#loc+market+name,#geo+lat,#geo+lon,#item+type,#item+name,#item+unit,#item+price+flag,#item+price+type,#currency,#value,#value+usd
1,1992-11-15,Maputo City,Cidade_De_Maputo,Maputo,-25.965278,32.589167,cereals and tubers,Maize (white),KG,actual,Retail,MZN,1.34,0.4641
2,1992-12-15,Gaza,Chokwe,Chokwe,-24.533333,32.983333,cereals and tubers,Maize (white),KG,actual,Retail,MZN,1.53,0.5193
3,1992-12-15,Inhambane,Maxixe,Maxixe,-23.859722,35.347222,cereals and tubers,Maize (white),KG,actual,Retail,MZN,1.69,0.5736
4,1992-12-15,Maputo City,Cidade_De_Maputo,Maputo,-25.965278,32.589167,cereals and tubers,Maize (white),KG,actual,Retail,MZN,1.55,0.5261


In [18]:
dataset.tail()

Unnamed: 0,date,admin1,admin2,market,latitude,longitude,category,commodity,unit,priceflag,pricetype,currency,price,usdprice
69095,2023-04-15,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,Beans (butter),KG,forecast,Retail,MZN,0.0,0.0
69096,2023-04-15,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,Beans (catarino),KG,forecast,Retail,MZN,0.0,0.0
69097,2023-04-15,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,Beans (magnum),KG,forecast,Retail,MZN,0.0,0.0
69098,2023-04-15,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,Cowpeas,KG,forecast,Retail,MZN,0.0,0.0
69099,2023-04-15,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,"Groundnuts (small, shelled)",KG,forecast,Retail,MZN,0.0,0.0


In [19]:
dataset.describe()

Unnamed: 0,date,admin1,admin2,market,latitude,longitude,category,commodity,unit,priceflag,pricetype,currency,price,usdprice
count,69100,69098,69098,69100,69098.0,69098.0,69100,69100,69100,69100,69100,69100,69100.0,69100.0
unique,362,12,82,100,158.0,158.0,8,58,8,3,3,2,5943.0,16325.0
top,2022-06-15,Inhambane,Cidade_De_Maputo,Maputo,-25.965278,32.589167,cereals and tubers,Maize (white),KG,actual,Retail,MZN,0.0,0.0
freq,1356,10457,4390,3053,2976.0,2976.0,27452,6176,58450,60120,68267,69099,5415.0,5415.0


In [20]:
# dataset shape
print(f"rows: {dataset.shape[0]}")
print(f"columns: {dataset.shape[1]}")
print()

# check for null values
print('null values:')
print(dataset.isnull().sum())

rows: 69100
columns: 14

null values:
date         0
admin1       2
admin2       2
market       0
latitude     2
longitude    2
category     0
commodity    0
unit         0
priceflag    0
pricetype    0
currency     0
price        0
usdprice     0
dtype: int64


In [21]:
# drop the first and empty rows
dataset.drop(0, inplace=True)
dataset = dataset.dropna()

In [22]:
# check data types
dataset.dtypes

date         object
admin1       object
admin2       object
market       object
latitude     object
longitude    object
category     object
commodity    object
unit         object
priceflag    object
pricetype    object
currency     object
price        object
usdprice     object
dtype: object

In [23]:
# Convert types
dataset['price'] = dataset['price'].astype(float)
dataset['latitude'] = dataset['latitude'].astype(float)
dataset['longitude'] = dataset['longitude'].astype(float)

dataset['date'] = pd.to_datetime(dataset['date'])

dataset['year'] = dataset['date'].dt.year
dataset['month'] = dataset['date'].dt.month

dataset.drop('date', axis=1, inplace=True)

In [24]:
# Drop rows without current prices
dataset = dataset[dataset['priceflag'] != 'forecast']

In [25]:
dataset['priceflag'].unique()

array(['actual'], dtype=object)

In [26]:
dataset['commodity'].unique()

array(['Maize (white)', 'Cassava (dry)', 'Rice', 'Rice (imported)',
       'Sugar (brown, local)', 'Oil (vegetable, local)', 'Beans (dry)',
       'Groundnuts (Mix)', 'Maize meal (white, first grade)',
       'Maize meal (white, with bran)', 'Oil (vegetable, imported)',
       'Cowpeas', 'Groundnuts (large, shelled)',
       'Groundnuts (small, shelled)', 'Wheat flour (local)',
       'Cassava flour', 'Cassava (fresh)',
       'Maize meal (white, without bran)', 'Potatoes (Irish, imported)',
       'Potatoes (Irish, local)', 'Rice (local)', 'Sweet potatoes',
       'Cabbage', 'Carrots', 'Garlic (large)', 'Garlic (small)',
       'Onions (local)', 'Peppers (green)', 'Tomatoes', 'Onions',
       'Beans (fresh)', 'Potatoes (unica)', 'Maize (imported)',
       'Beans (red)', 'Beans (butter)', 'Beans (catarino)',
       'Beans (magnum)', 'Sugar (brown, imported)', 'Onions (imported)',
       'Maize meal', 'Potatoes', 'Eggs', 'Fish', 'Salt (iodised)',
       'Sugar', 'Firewood', 'Handwash so

In [27]:
dataset.drop(['usdprice', 'currency', 'priceflag'], axis=1, inplace=True)

In [28]:
dataset.rename(columns={
    'admin1': 'province',
    'admin2': 'district',
    'price': 'price(MZN)',
}, inplace=True)

In [29]:
print('dataset new shape: ', dataset.shape)

dataset new shape:  (60118, 12)


In [30]:
dataset.head()

Unnamed: 0,province,district,market,latitude,longitude,category,commodity,unit,pricetype,price(MZN),year,month
1,Maputo City,Cidade_De_Maputo,Maputo,-25.965278,32.589167,cereals and tubers,Maize (white),KG,Retail,1.34,1992,11
2,Gaza,Chokwe,Chokwe,-24.533333,32.983333,cereals and tubers,Maize (white),KG,Retail,1.53,1992,12
3,Inhambane,Maxixe,Maxixe,-23.859722,35.347222,cereals and tubers,Maize (white),KG,Retail,1.69,1992,12
4,Maputo City,Cidade_De_Maputo,Maputo,-25.965278,32.589167,cereals and tubers,Maize (white),KG,Retail,1.55,1992,12
5,Gaza,Chokwe,Chokwe,-24.533333,32.983333,cereals and tubers,Maize (white),KG,Retail,1.67,1993,1


In [31]:
dataset.tail()

Unnamed: 0,province,district,market,latitude,longitude,category,commodity,unit,pricetype,price(MZN),year,month
60116,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,Beans (catarino),KG,Retail,80.0,2023,2
60117,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,Beans (magnum),KG,Retail,100.0,2023,2
60118,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,Cowpeas,KG,Retail,37.5,2023,2
60119,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,"Groundnuts (small, shelled)",KG,Retail,60.0,2023,2
60120,Zambezia,Mocuba,Mocuba,-16.8375,36.985556,pulses and nuts,Sesame,KG,Retail,100.0,2023,2


In [32]:
# save new dataset
dataset.to_csv('../../data/processed/wfp_food_prices_moz.csv', index=False)