# 01 — Business Understanding (Kedro + CRISP-DM)

In [1]:
# --- Inicializar Kedro sin magics ---
from pathlib import Path
from kedro.framework.session import KedroSession
from kedro.framework.startup import bootstrap_project

# localizar la raíz del proyecto subiendo hasta encontrar pyproject.toml
project_path = Path.cwd()
while project_path != project_path.parent and not (project_path / "pyproject.toml").exists():
    project_path = project_path.parent
assert (project_path / "pyproject.toml").exists(), f"No encuentro pyproject.toml desde {Path.cwd()}"

bootstrap_project(str(project_path))
session = KedroSession.create(project_path=str(project_path))
ctx = session.load_context()
catalog = ctx.catalog

print("Proyecto:", project_path)

Proyecto: c:\Users\lttlk\Documents\Nueo\machinegame


In [2]:
# Diagnóstico rápido
import sys
try:
    import kedro
    print("Python:", sys.executable)
    print("Kedro:", kedro.__version__)
    print("Datasets (primeros 15):", list(catalog.list())[:15])
except Exception as e:
    print("No se pudo inspeccionar kedro:", e)

Python: c:\Users\lttlk\Documents\Nueo\.venv\Scripts\python.exe
Kedro: 1.0.0
No se pudo inspeccionar kedro: 'DataCatalogWithCatalogCommandsMixin' object has no attribute 'list'


In [3]:
# Helpers: cargar o reconstruir y reparación de encoding
from pathlib import Path
import pandas as pd

def load_or_build(name: str, pipe: str):
    """Intenta catalog.load(name); si falla, ejecuta el pipeline y vuelve a cargar."""
    try:
        return catalog.load(name)
    except Exception as e:
        print(f"[load_or_build] {name} no disponible o ilegible ({e}). Corriendo pipeline: {pipe} …")
        session.run(pipeline_name=pipe)
        return catalog.load(name)

def fix_reporting_csv_encoding(report_dir: Path | str = None, encoding_from: str = "latin1", encoding_to: str = "utf-8"):
    """Repara CSVs en 08_reporting que estén en ANSI/latin-1: reescribe en UTF-8.
    Si ya son UTF-8 válidos, los deja intactos.
    """
    if report_dir is None:
        report_dir = project_path / "data" / "08_reporting"
    report_dir = Path(report_dir)
    if not report_dir.exists():
        print("[fix_reporting_csv_encoding] No existe la carpeta:", report_dir)
        return

    fixed, skipped = [], []
    for p in report_dir.glob("*.csv"):
        try:
            _ = pd.read_csv(p, encoding=encoding_to, nrows=5)
            skipped.append(p.name)
        except Exception:
            try:
                df = pd.read_csv(p, encoding=encoding_from)
                df.to_csv(p, index=False, encoding=encoding_to)
                fixed.append(p.name)
            except Exception as e:
                print(f"[fix_reporting_csv_encoding] No se pudo reparar {p.name}: {e}")
    print("[fix_reporting_csv_encoding] Reparados (latin-1 -> utf-8):", fixed)
    print("[fix_reporting_csv_encoding] Ya estaban en utf-8:", skipped)

### Ejecutar pipeline de Business Understanding

In [4]:
session.run(pipeline_name="business_understanding")


[1m{[0m
    [32m'eda_summary'[0m: [1;35mkedro_datasets.pandas.csv_dataset.CSVDataset[0m[1m([0m[33mfilepath[0m=[1;35mPurePosixPath[0m[1m([0m[32m'C:/Users/lttlk/Documents/Nueo/machinegame/data/08_reporting/eda_summary.csv'[0m[1m)[0m, [33mprotocol[0m=[32m'file'[0m, [33mload_args[0m=[1m{[0m[32m'encoding'[0m: [32m'utf-8'[0m[1m}[0m, [33msave_args[0m=[1m{[0m[32m'index'[0m: [3;91mFalse[0m, [32m'encoding'[0m: [32m'utf-8'[0m[1m}[0m[1m)[0m,
    [32m'vg_sales_describe'[0m: [1;35mkedro_datasets.pandas.csv_dataset.CSVDataset[0m[1m([0m[33mfilepath[0m=[1;35mPurePosixPath[0m[1m([0m[32m'C:/Users/lttlk/Documents/Nueo/machinegame/data/08_reporting/vg_sales_describe.csv'[0m[1m)[0m, [33mprotocol[0m=[32m'file'[0m, [33mload_args[0m=[1m{[0m[32m'encoding'[0m: [32m'utf-8'[0m[1m}[0m, [33msave_args[0m=[1m{[0m[32m'index'[0m: [3;91mFalse[0m, [32m'encoding'[0m: [32m'utf-8'[0m[1m}[0m[1m)[0m,
    [32m'steam_describe'[0m: [

### Reparar encoding de reporting (si fuese necesario)

In [5]:
fix_reporting_csv_encoding()

[fix_reporting_csv_encoding] Reparados (latin-1 -> utf-8): ['business_understanding.csv']
[fix_reporting_csv_encoding] Ya estaban en utf-8: ['dataset_inventory.csv', 'eda_summary.csv', 'games_describe.csv', 'steam_describe.csv', 'vg_sales_describe.csv']


### Cargar artefactos: `business_report` y `dataset_inventory`

In [6]:
business_report = load_or_build("business_report", "business_understanding")
dataset_inventory = load_or_build("dataset_inventory", "business_understanding")
display(business_report.head(30))
display(dataset_inventory)
print("business_report shape:", business_report.shape)

Unnamed: 0,seccion,item
0,Objetivos,Construir un dataset limpio y documentado a pa...
1,Objetivos,Identificar variables potencialmente útiles pa...
2,Preguntas de Negocio,¿Qué géneros y plataformas muestran mayor trac...
3,Preguntas de Negocio,¿Cómo se distribuyen las horas jugadas/accione...
4,Preguntas de Negocio,¿Qué variables podrían explicar las ventas por...
5,Métricas de Éxito,Datasets limpios en data/03_primary/
6,Métricas de Éxito,Reportes EDA en data/08_reporting/
7,Métricas de Éxito,Notebook por fase (1 a 3)


Unnamed: 0,dataset,filepath,exists,size_bytes,sample_columns
0,games_raw,data\01_raw\Games.csv,True,616319,"Console,GameName,Review,Score"
1,steam_raw,data\01_raw\steam-200k.csv,True,8958107,"151603712,The Elder Scrolls V Skyrim,purchase,..."
2,vg_sales_raw,data\01_raw\Video_Games_Sales_as_at_22_Dec_201...,True,1610764,"Name,Platform,Year_of_Release,Genre,Publisher,..."


business_report shape: (8, 2)


#### Conclusiones/Decisiones de negocio (rellena aquí)