# Data Tutorial

This notebook demonstrates how to use the `src/meli_ads/data` module to load and transform the dataset.

In [None]:
import sys
import os
from pathlib import Path

# Ensure src is in python path
project_root = Path(os.getcwd()).parents[0]
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

from src.meli_ads.data import MeliChallengeDataset
from src.meli_ads.data.transforms import HistoryFeatureExtractor

## 1. Initialize Dataset

We initialize the dataset pointing to the raw data directory.

In [None]:
# Initialize dataset with the transformer
dataset = MeliChallengeDataset(
    data_dir='../data/raw',
    transform=HistoryFeatureExtractor()
)

## 2. Load Data

Load the training data (first 1000 rows for speed).

In [None]:
df_train = dataset.load_train(nrows=1000)
df_train.head()

## 3. Apply Transforms

The `load_train` returns the raw pandas DataFrame. 
To get a **transformed** example (with features extracted), we use `get_example(index)`.

In [None]:
# Get the 0-th example, transformed
example = dataset.get_example(0, dataset='train')

print("Transformed Keys:", example.keys())
print("Num Events:", example['num_events'])
print("Item Bought:", example['item_bought'])