# RAWG Data Engineering Project - Final Orchestrator
This notebook runs the full ETL pipeline from data ingestion (Bronze) to transformation (Silver).

In [None]:
import logging
import os
from datetime import datetime, timedelta
from src.ingestor import GameDataIngestor
from src.transformer import GameTransformer
from deltalake import DeltaTable

# --- LOGGING SETUP ---
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, "pipeline.log")

# Clear existing handlers to prevent duplicates on re-run
logger = logging.getLogger()
if logger.hasHandlers():
    logger.handlers.clear()

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file, mode='a', encoding='utf-8'),
        logging.StreamHandler()
    ]
)
logging.info(f"Logging iniciado. Archivo: {log_file}")

# --- PIPELINE CONFIGURATION ---
# Calculate pipeline window (e.g., last 30 days or custom backfill)
end_date = datetime.now()
start_date = end_date - timedelta(days=30)
str_start = start_date.strftime('%Y-%m-%d')
str_end = end_date.strftime('%Y-%m-%d')

print(f"Pipeline Window: {str_start} to {str_end}")

2025-12-08 23:53:46,989 - INFO - Logging iniciado. Archivo: logs\pipeline.log


Pipeline Window: 2025-12-01 to 2025-12-08


In [2]:
# --- PART 1: INGESTION (BRONZE LAYER) ---
ingestor = GameDataIngestor()

print("1. Starting Full Load for Genres...")
try:
    ingestor.get_genres_full()
except Exception as e:
    print(f"Genre load failed: {e}")

print("\n2. Starting Incremental Load for Games...")
try:
    ingestor.get_games_incremental(start_date=str_start, end_date=str_end)
except Exception as e:
    print(f"Game load failed: {e}")

2025-12-08 23:53:46,995 - INFO - Iniciando Full Load para genres...


1. Starting Full Load for Genres...


2025-12-08 23:53:49,145 - INFO - Guardados 19 gÃ©neros en c:\Users\cyber\OneDrive\Desktop\Curso UTN\TP-games\data\bronze\genres (Mode: overwrite).
2025-12-08 23:53:49,146 - INFO - Iniciando Carga Incremental para juegos (2025-12-01 a 2025-12-08)...
2025-12-08 23:53:49,146 - INFO - Cargando pÃ¡gina 1...



2. Starting Incremental Load for Games...


2025-12-08 23:53:50,693 - INFO - No hay mÃ¡s pÃ¡ginas disponibles.
2025-12-08 23:53:50,717 - INFO - Guardados 17 juegos exitosamente (Mode: Idempotent Append).


In [3]:
# --- PART 2: TRANSFORMATION (SILVER LAYER) ---
print("\n3. Starting Transformation Process...")
transformer = GameTransformer()
transformer.process()

2025-12-08 23:53:50,723 - INFO - Starting Silver Layer transformation...
2025-12-08 23:53:50,762 - INFO - Loaded 17 records from Bronze Games.
2025-12-08 23:53:50,779 - INFO - Saved 17 records to c:\Users\cyber\OneDrive\Desktop\Curso UTN\TP-games\data\silver\games_refined
2025-12-08 23:53:50,802 - INFO - Saved 12 analytics records to c:\Users\cyber\OneDrive\Desktop\Curso UTN\TP-games\data\silver\games_analytics



3. Starting Transformation Process...


In [4]:
# --- VERIFICATION (ALL TABLES) ---
from src.config import Config
import os
from deltalake import DeltaTable
import pandas as pd

def inspect_table(layer, table_name, partition_col=None):
    base_path = Config.BRONZE_PATH if layer == 'bronze' else Config.SILVER_PATH
    path = os.path.join(base_path, table_name)
    
    print(f"\n>>> Inspecting {layer.upper()}: {table_name}")
    if os.path.exists(path):
        try:
            dt = DeltaTable(path)
            df = dt.to_pandas()
            # print(f"Path: {path}")
            print(f"Total Records: {len(df)}")
            if partition_col and partition_col in df.columns:
                print(f"Partitions ({partition_col}): {df[partition_col].unique()}")
            # print("Preview:")
            display(df.head(3))
        except Exception as e:
            print(f"Error reading table: {e}")
    else:
        print(f"Table not found at {path}")

# 1. Bronze Tables
print("\n--- ðŸŸ¤ BRONZE LAYER (Raw Data) ---")
print("1. genres: Catalogo completo de generos (Datos estaticos/referencia).")
inspect_table('bronze', 'genres')

print("2. games: Juegos crudos obtenidos incrementalmente (Datos temporales, ultimos 30 dias).")
inspect_table('bronze', 'games', partition_col='extraction_date')

# 2. Silver Tables
print("\n--- âšª SILVER LAYER (Refined Data) ---")
print("3. games_refined: Tabla maestra limpia, desduplicada y con tipos corregidos.")
inspect_table('silver', 'games_refined', partition_col='extraction_date')

print("4. games_analytics: Agregacion de metricas (Avg Rating, Conteo) por AÃ±o y Genero.")
print("(Nota: 'Avg Rating' puede ser 0 para juegos muy recientes sin reviews).")
inspect_table('silver', 'games_analytics')


--- ðŸŸ¤ BRONZE LAYER (Raw Data) ---
1. genres: Catalogo completo de generos (Datos estaticos/referencia).

>>> Inspecting BRONZE: genres
Total Records: 19


Unnamed: 0,id,name,slug,games_count,image_background,games
0,4,Action,action,191319,https://media.rawg.io/media/games/4be/4be6a6ad...,"[{""id"": 3498, ""slug"": ""grand-theft-auto-v"", ""n..."
1,51,Indie,indie,86121,https://media.rawg.io/media/games/9fa/9fa63622...,"[{""id"": 1030, ""slug"": ""limbo"", ""name"": ""Limbo""..."
2,3,Adventure,adventure,151514,https://media.rawg.io/media/games/2ad/2ad87a4a...,"[{""id"": 3439, ""slug"": ""life-is-strange-episode..."


2. games: Juegos crudos obtenidos incrementalmente (Datos temporales, ultimos 30 dias).

>>> Inspecting BRONZE: games
Total Records: 17
Partitions (extraction_date): ['2025-12-08']


Unnamed: 0,slug,name,playtime,platforms,stores,released,tba,background_image,rating,rating_top,...,user_game,reviews_count,community_rating,saturated_color,dominant_color,short_screenshots,parent_platforms,genres,extraction_ts,extraction_date
0,skate-story,Skate Story,0,"[{""platform"": {""id"": 4, ""name"": ""PC"", ""slug"": ...","[{""store"": {""id"": 5, ""name"": ""GOG"", ""slug"": ""g...",2025-12-08,False,https://media.rawg.io/media/games/53e/53e81bf3...,0.0,0,...,,1,0,0f0f0f,0f0f0f,"[{""id"": -1, ""image"": ""https://media.rawg.io/me...","[{""platform"": {""id"": 1, ""name"": ""PC"", ""slug"": ...","[{""id"": 4, ""name"": ""Action"", ""slug"": ""action""}]",2025-12-08 23:53:50.694855,2025-12-08
1,temple-of-the-green-moon-prologue,Temple of the Green Moon: Prologue,0,"[{""platform"": {""id"": 4, ""name"": ""PC"", ""slug"": ...","[{""store"": {""id"": 1, ""name"": ""Steam"", ""slug"": ...",2025-12-05,False,https://media.rawg.io/media/screenshots/5d7/5d...,0.0,0,...,,0,0,0f0f0f,0f0f0f,"[{""id"": -1, ""image"": ""https://media.rawg.io/me...","[{""platform"": {""id"": 1, ""name"": ""PC"", ""slug"": ...","[{""id"": 3, ""name"": ""Adventure"", ""slug"": ""adven...",2025-12-08 23:53:50.694855,2025-12-08
2,hot-asses,HOT ASSES,0,"[{""platform"": {""id"": 4, ""name"": ""PC"", ""slug"": ...","[{""store"": {""id"": 1, ""name"": ""Steam"", ""slug"": ...",2025-12-05,False,https://media.rawg.io/media/screenshots/39a/39...,0.0,0,...,,1,0,0f0f0f,0f0f0f,"[{""id"": -1, ""image"": ""https://media.rawg.io/me...","[{""platform"": {""id"": 1, ""name"": ""PC"", ""slug"": ...","[{""id"": 3, ""name"": ""Adventure"", ""slug"": ""adven...",2025-12-08 23:53:50.694855,2025-12-08



--- âšª SILVER LAYER (Refined Data) ---
3. games_refined: Tabla maestra limpia, desduplicada y con tipos corregidos.

>>> Inspecting SILVER: games_refined
Total Records: 17
Partitions (extraction_date): ['2025-12-08']


Unnamed: 0,id,slug,name,released,released_year,tba,background_image,rating,rating_top,metacritic,is_top_rated,primary_genre,extraction_date,__index_level_0__
0,455518,skate-story,Skate Story,2025-12-08,2025,False,https://media.rawg.io/media/games/53e/53e81bf3...,0.0,0,,False,Action,2025-12-08,0
1,1011996,shes-leaving,She's Leaving,2025-12-01,2025,False,https://media.rawg.io/media/screenshots/d12/d1...,0.0,0,,False,Unknown,2025-12-08,14
2,1015335,marvel-cosmic-invasion,Marvel Cosmic Invasion,2025-12-01,2025,False,https://media.rawg.io/media/games/d38/d383be9a...,0.0,0,,False,Action,2025-12-08,13


4. games_analytics: Agregacion de metricas (Avg Rating, Conteo) por AÃ±o y Genero.
(Nota: 'Avg Rating' puede ser 0 para juegos muy recientes sin reviews).

>>> Inspecting SILVER: games_analytics
Total Records: 12


Unnamed: 0,released_year,genre,avg_rating,game_count
0,2025,Action,0.0,7
1,2025,Adventure,0.0,4
2,2025,RPG,0.0,4
