# RAWG Data Engineering Project - Final Orchestrator
This notebook runs the full ETL pipeline from data ingestion (Bronze) to transformation (Silver).

In [1]:
import logging
from datetime import datetime, timedelta
from src.ingestor import GameDataIngestor
from src.transformer import GameTransformer
from deltalake import DeltaTable

# Setup Logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# --- CONFIGURATION ---
# Calculate last 30 days window
end_date = datetime.now()
start_date = end_date - timedelta(days=30)
str_start = start_date.strftime('%Y-%m-%d')
str_end = end_date.strftime('%Y-%m-%d')

print(f"Pipeline Window: {str_start} to {str_end}")

Pipeline Window: 2025-11-08 to 2025-12-08


In [2]:
# --- PART 1: INGESTION (BRONZE LAYER) ---
ingestor = GameDataIngestor()

print("1. Starting Full Load for Genres...")
try:
    ingestor.get_genres_full()
except Exception as e:
    print(f"Genre load failed: {e}")

print("\n2. Starting Incremental Load for Games...")
try:
    ingestor.get_games_incremental(start_date=str_start, end_date=str_end)
except Exception as e:
    print(f"Game load failed: {e}")

2025-12-08 22:27:44,211 - INFO - Iniciando Full Load para genres...


1. Starting Full Load for Genres...


2025-12-08 22:27:45,995 - INFO - Guardados 19 géneros en c:\Users\cyber\OneDrive\Desktop\Curso UTN\TP-games\data\bronze\genres (Mode: overwrite).
2025-12-08 22:27:45,995 - INFO - Iniciando Carga Incremental para juegos (2025-11-08 a 2025-12-08)...
2025-12-08 22:27:45,996 - INFO - Cargando página 1/5...



2. Starting Incremental Load for Games...


2025-12-08 22:27:47,243 - INFO - Cargando página 2/5...
2025-12-08 22:27:48,468 - INFO - Cargando página 3/5...
2025-12-08 22:27:49,756 - INFO - Cargando página 4/5...
2025-12-08 22:27:51,051 - INFO - Cargando página 5/5...
2025-12-08 22:27:52,225 - INFO - Guardados 100 juegos exitosamente (Mode: Idempotent Append).


In [3]:
# --- PART 2: TRANSFORMATION (SILVER LAYER) ---
print("\n3. Starting Transformation Process...")
transformer = GameTransformer()
transformer.process()

2025-12-08 22:27:52,231 - INFO - Starting Silver Layer transformation...
2025-12-08 22:27:52,270 - INFO - Loaded 100 records from Bronze Games.
2025-12-08 22:27:52,288 - INFO - Saved 100 records to c:\Users\cyber\OneDrive\Desktop\Curso UTN\TP-games\data\silver\games_refined
2025-12-08 22:27:52,305 - INFO - Saved 14 analytics records to c:\Users\cyber\OneDrive\Desktop\Curso UTN\TP-games\data\silver\games_analytics



3. Starting Transformation Process...


In [4]:
# --- VERIFICATION ---
from src.config import Config
import os
import pandas as pd

silver_path = Config.SILVER_PATH
games_refined_path = os.path.join(silver_path, "games_refined")

print(f"\nChecking Silver Table at: {games_refined_path}")
if os.path.exists(games_refined_path):
    try:
        dt = DeltaTable(games_refined_path)
        df_silver = dt.to_pandas()
        print("Silver Dataframe Head:")
        display(df_silver.head())
        print(f"Total Records: {len(df_silver)}")
    except Exception as e:
        print(f"Could not read Silver table: {e}")
else:
    print("Silver table not found. Pipeline might have failed or no data was fetched.")


Checking Silver Table at: c:\Users\cyber\OneDrive\Desktop\Curso UTN\TP-games\data\silver\games_refined
Silver Dataframe Head:


Unnamed: 0,id,slug,name,released,released_year,tba,background_image,rating,rating_top,metacritic,is_top_rated,primary_genre,extraction_date
0,455518,skate-story,Skate Story,2025-12-08,2025,False,https://media.rawg.io/media/games/53e/53e81bf3...,0.0,0,,False,Action,2025-12-08
1,1015351,temple-of-the-green-moon-prologue,Temple of the Green Moon: Prologue,2025-12-05,2025,False,https://media.rawg.io/media/screenshots/5d7/5d...,0.0,0,,False,Adventure,2025-12-08
2,1015352,hot-asses,HOT ASSES,2025-12-05,2025,False,https://media.rawg.io/media/screenshots/39a/39...,0.0,0,,False,Adventure,2025-12-08
3,41626,routine,Routine,2025-12-04,2025,False,https://media.rawg.io/media/games/c2c/c2c572ae...,0.0,0,,False,Adventure,2025-12-08
4,1015350,fight-for-america,Fight For America,2025-12-04,2025,False,https://media.rawg.io/media/screenshots/c79/c7...,0.0,0,,False,Strategy,2025-12-08


Total Records: 100


In [5]:
df_debug_ingestor

NameError: name 'df_debug_ingestor' is not defined