In [1]:
conda install -c conda-forge xgboost

Jupyter detected...
Channels:
 - conda-forge
 - defaults
Platform: win-64
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: C:\Users\Asus\anaconda3

  added / updated specs:
    - xgboost


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _py-xgboost-mutex-2.0      |            cpu_0          11 KB  conda-forge
    ca-certificates-2025.6.15  |       h4c7d964_0         148 KB  conda-forge
    certifi-2025.6.15          |     pyhd8ed1ab_0         152 KB  conda-forge
    libxgboost-2.1.1           |       h585ebfc_0         2.7 MB
    openssl-3.1.0              |       hcfcfb64_3         7.1 MB  conda-forge
    py-xgboost-2.1.1           | cpu_pyhb442362_0         131 KB  conda-forge
    ucrt-10.0.22621.0          |       h57928b3_1         547 KB  conda-forge
    xgboost-2.1.1              | cpu_pyhb8f9a19_0          14 

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

In [3]:
# Loading pre-processed data
dados = pd.read_csv("../Dados/wfp_food_prices_moz.csv")
dados['date'] = pd.to_datetime(dados['year'].astype(str) + '-' + dados['month'].astype(str) + '-01')

In [4]:
# Pre-processing
from sklearn.preprocessing import LabelEncoder

colunas_para_manter = [
    'province', 'district', 'market', 'category', 'commodity', 'unit', 
    'pricetype', 'price(MZN)', 'date'
]
dados = dados[colunas_para_manter].dropna(subset=['price(MZN)'])

label_cols = ['province', 'district', 'market', 'category', 'commodity', 'unit', 'pricetype']
label_encoders = {}
for col in label_cols:
    le = LabelEncoder()
    dados[col] = le.fit_transform(dados[col].astype(str))
    label_encoders[col] = le

In [7]:
# Creating time features
dados['month'] = dados['date'].dt.month
dados['year'] = dados['date'].dt.year
dados['month_id'] = (dados['year'] - dados['year'].min()) * 12 + dados['month']

In [8]:
# Features and target
X = dados.drop(columns=['price(MZN)', 'date'])
y = dados['price(MZN)']

In [9]:
# Divide in train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# === Model 1: Random Forest ===
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

In [11]:
# === Model 2: XGBoost ===
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

In [12]:
# === Evaluation ===
def avaliar_modelo(y_true, y_pred, nome):
    print(f"\nModelo: {nome}")
    print("MAE:", mean_absolute_error(y_true, y_pred))
    print("RMSE:", np.sqrt(mean_squared_error(y_true, y_pred)))
    print("R²:", r2_score(y_true, y_pred))

avaliar_modelo(y_test, y_pred_rf, "Random Forest")
avaliar_modelo(y_test, y_pred_xgb, "XGBoost")


Modelo: Random Forest
MAE: 9.889589187966523
RMSE: 32.31281689799432
R²: 0.9752641863935545

Modelo: XGBoost
MAE: 13.514872710042829
RMSE: 34.285171285685095
R²: 0.9721523079733382
