# 🧪 EDA + Migración CSV → Firestore

Notebook listo para **Google Colab**. Sube `movies.csv` y tu **Service Account JSON**.
Este flujo cubre:
1) EDA rápido
2) Migración de CSV a Firestore (colección `movies`)
3) Lectura desde Firestore a `DataFrame`


In [None]:

# Si usas Colab, descomenta para instalar dependencias
# !pip -q install google-cloud-firestore pandas


In [None]:

import pandas as pd
import json
from google.cloud import firestore
from google.oauth2 import service_account

# === CONFIGURA ===
CSV_PATH = "movies.csv"  # Ajusta si lo subiste con otro nombre
SERVICE_ACCOUNT_JSON = "service_account.json"  # Sube tu JSON a Colab con este nombre
PROJECT_ID = None  # Pon tu project_id si no viene en el JSON

# Carga credenciales
with open(SERVICE_ACCOUNT_JSON, "r") as f:
    creds_dict = json.load(f)
credentials = service_account.Credentials.from_service_account_info(creds_dict)
project_id = PROJECT_ID or creds_dict["project_id"]

# Cliente de Firestore
db = firestore.Client(credentials=credentials, project=project_id)

# EDA rápido
df = pd.read_csv(CSV_PATH)
display(df.head())
print("Filas, Columnas:", df.shape)
print("Columnas:", list(df.columns))


In [None]:

# MIGRACIÓN CSV -> Firestore (colección "movies")
# Usa add() para IDs auto-generados. Campos esperados: name, director, genre, company
# Si tus columnas tienen otro nombre, ajusta el mapeo.

import math

collection = db.collection("movies")
batch_size = 500
batch = db.batch()
count = 0

for _, row in df.iterrows():
    doc = {
        "name": row.get("name"),
        "director": row.get("director"),
        "genre": row.get("genre"),
        "company": row.get("company"),
    }
    ref = collection.document()  # ID autogenerado
    batch.set(ref, doc)
    count += 1
    if count % batch_size == 0:
        batch.commit()
        batch = db.batch()
# commit final
batch.commit()
print(f"Migrados {count} documentos a la colección 'movies'.")


In [None]:

# Lectura de Firestore -> DataFrame
docs = db.collection("movies").stream()
rows = []
for d in docs:
    data = d.to_dict() or {}
    data["doc_id"] = d.id
    rows.append(data)
movies_df = pd.DataFrame(rows)
display(movies_df.head(20))
print("Total documentos:", movies_df.shape[0])
