
# EDA — WFP Food Prices Haiti (2005–2025)

Objectif : réaliser une **analyse exploratoire des données**




## 1. Importations et configuration

In [36]:
import os
import json
import math
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")

## 2. Chargement des données et inspection initiale

In [37]:
df = pd.read_csv('wfp_food_prices_hti_2025.csv')

In [38]:
df.shape
display(df.head(5))

print("\nColumn:", list(df.columns))

df.shape[0]

Unnamed: 0,date,admin1,admin2,market,market_id,latitude,longitude,category,commodity,commodity_id,unit,priceflag,pricetype,currency,price,usdprice
0,#date,#adm1+name,#adm2+name,#loc+market+name,#loc+market+code,#geo+lat,#geo+lon,#item+type,#item+name,#item+code,#item+unit,#item+price+flag,#item+price+type,#currency+code,#value,#value+usd
1,2005-01-15,Artibonite,Gonaives,Gonaives,103,19.45,-72.68,cereals and tubers,Maize meal (local),471,Marmite,actual,Retail,HTG,60,1.41
2,2005-01-15,Artibonite,Gonaives,Gonaives,103,19.45,-72.68,cereals and tubers,Rice (tchako),57,Marmite,actual,Retail,HTG,94,2.21
3,2005-01-15,Artibonite,Gonaives,Gonaives,103,19.45,-72.68,cereals and tubers,Wheat flour (imported),339,Marmite,actual,Retail,HTG,60,1.41
4,2005-01-15,Centre,Hinche,Hinche,104,19.15,-72.02,cereals and tubers,Maize meal (imported),574,Pound,actual,Retail,HTG,13.16,0.31



Column: ['date', 'admin1', 'admin2', 'market', 'market_id', 'latitude', 'longitude', 'category', 'commodity', 'commodity_id', 'unit', 'priceflag', 'pricetype', 'currency', 'price', 'usdprice']


15413

In [39]:
for col in ['price', 'usdprice', 'latitude', 'longitude']:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

df['date'] = pd.to_datetime(df['date'], errors='coerce')
for col in ['market_id', 'commodity_id']:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce', downcast='integer')

# Strip text 
text_cols = ['admin1','admin2','market','category','commodity','unit','priceflag','pricetype','currency']
for c in text_cols:
    if c in df.columns:
        df[c] = df[c].astype(str).str.strip()


df = df.drop_duplicates()



## 4. Qualité de la donnée

In [40]:
na_rate = df.isna().mean().sort_values(ascending=False).rename('na_rate')
display((na_rate*100).round(2).to_frame())


df.dtypes

Unnamed: 0,na_rate
date,0.01
market_id,0.01
latitude,0.01
longitude,0.01
commodity_id,0.01
price,0.01
usdprice,0.01
admin1,0.0
admin2,0.0
market,0.0


date            datetime64[ns]
admin1                  object
admin2                  object
market                  object
market_id              float64
latitude               float64
longitude              float64
category                object
commodity               object
commodity_id           float64
unit                    object
priceflag               object
pricetype               object
currency                object
price                  float64
usdprice               float64
dtype: object

In [41]:
# Stats
display(df[['price','usdprice','latitude','longitude']].describe(percentiles=[.01,.05,.25,.5,.75,.95,.99]))

# Analyse 
for c in ['category','commodity','unit','currency','pricetype','priceflag','admin1','market']:
    if c in df.columns:
        vc = df[c].value_counts(dropna=False).head(10)
        print(f"\nTop modalités pour {c}:")
        display(vc)

Unnamed: 0,price,usdprice,latitude,longitude
count,15412.0,15412.0,15412.0,15412.0
mean,264.160604,3.115601,18.976405,-72.691466
std,342.870603,2.814973,0.615003,0.753501
min,6.75,0.16,18.2,-74.16
1%,12.0,0.28,18.2,-74.08
5%,16.8,0.37,18.2,-74.08
25%,43.6975,0.88,18.54,-72.83
50%,120.0,2.23,19.15,-72.53
75%,350.0,4.58,19.55,-72.2
95%,1000.0,8.3645,19.94,-71.73



Top modalités pour category:


category
cereals and tubers       10476
pulses and nuts           3033
oil and fats              1082
miscellaneous food         779
vegetables and fruits       42
#item+type                   1
Name: count, dtype: int64


Top modalités pour commodity:


commodity
Wheat flour (imported)       1920
Maize meal (local)           1897
Beans (black)                1577
Sorghum                      1559
Rice (tchako)                1525
Beans (red)                  1453
Rice (local)                 1326
Maize meal (imported)        1133
Oil (vegetable, imported)    1082
Sugar (white)                 779
Name: count, dtype: int64


Top modalités pour unit:


unit
Marmite    9585
Pound      4137
Gallon     1082
350 G       563
4 pcs        14
3 pcs        11
Dozen         5
Unit          4
Packet        4
10 pcs        4
Name: count, dtype: int64


Top modalités pour currency:


currency
HTG               15412
#currency+code        1
Name: count, dtype: int64


Top modalités pour pricetype:


pricetype
Retail              15412
#item+price+type        1
Name: count, dtype: int64


Top modalités pour priceflag:


priceflag
actual              15412
#item+price+flag        1
Name: count, dtype: int64


Top modalités pour admin1:


admin1
West           2106
Centre         1932
Grande'Anse    1920
North          1918
South-East     1860
South          1792
North-East     1464
Artibonite     1276
North-West     1144
#adm1+name        1
Name: count, dtype: int64


Top modalités pour market:


market
Port-au-Prince       2106
Hinche               1932
Cap-Haitien          1918
Jeremie              1869
Jacmel               1860
Cayes                1792
Ouanaminthe          1464
Gonaives             1276
Port-de-Paix         1144
Marche de Jeremie      20
Name: count, dtype: int64