Champion bucket to Bronze

In [2]:
from google.cloud import storage, bigquery
import pandas as pd
import json
import os
import uuid

bucket_name = 'batch-images'

project_id = "big-data-lol"
bucket_name = "batch-images"
prefix = 'item/14.12/'
dataset_id = 'Bronze_LOL_Dataset'
table_id = 'bronze_batch_img_item'

storage_client = storage.Client(project=project_id)
bigquery_client = bigquery.Client(project=project_id)

bucket = storage_client.bucket(bucket_name)
blobs = bucket.list_blobs(prefix=prefix)

local_files = []

for blob in blobs:
    local_path = '/tmp/' + os.path.basename(blob.name)
    blob.download_to_filename(local_path)
    local_files.append(local_path)

print(f"Downloaded files: {local_files}")

rows = []

for file_path in local_files:
    with open(file_path, 'r', encoding='utf-8') as f:
        data_json = json.load(f)

    for key, value in data_json.items():
        row = {
            'UID': str(uuid.uuid4()),
            'Key': key,
            'Value': json.dumps(value),  # Convertimos el valor a una cadena JSON
            'FileName': os.path.basename(file_path)
        }
        rows.append(row)

# Crear un DataFrame de pandas
df = pd.DataFrame(rows)

# Definir el esquema explícitamente
schema = [
    bigquery.SchemaField('UID', 'STRING', mode='REQUIRED'),
    bigquery.SchemaField('Key', 'STRING', mode='REQUIRED'),
    bigquery.SchemaField('Value', 'STRING', mode='REQUIRED'),
    bigquery.SchemaField('FileName', 'STRING', mode='REQUIRED')
]

# Configuración del trabajo de carga
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE',
    schema=schema
)

# Cargar el DataFrame a BigQuery
table_ref = bigquery_client.dataset(dataset_id).table(table_id)
job = bigquery_client.load_table_from_dataframe(df, table_ref, job_config=job_config)

# Esperar a que termine el trabajo
job.result()

print(f"DataFrame cargado en BigQuery: {dataset_id}.{table_id}")


Downloaded files: ['/tmp/Aatrox.json', '/tmp/Ahri.json', '/tmp/Akali.json', '/tmp/Akshan.json', '/tmp/Alistar.json', '/tmp/Amumu.json', '/tmp/Anivia.json', '/tmp/Annie.json', '/tmp/Aphelios.json', '/tmp/Ashe.json', '/tmp/AurelionSol.json', '/tmp/Azir.json', '/tmp/Bard.json', '/tmp/Belveth.json', '/tmp/Blitzcrank.json', '/tmp/Brand.json', '/tmp/Braum.json', '/tmp/Briar.json', '/tmp/Caitlyn.json', '/tmp/Camille.json', '/tmp/Cassiopeia.json', '/tmp/Chogath.json', '/tmp/Corki.json', '/tmp/Darius.json', '/tmp/Diana.json', '/tmp/DrMundo.json', '/tmp/Draven.json', '/tmp/Ekko.json', '/tmp/Elise.json', '/tmp/Evelynn.json', '/tmp/Ezreal.json', '/tmp/Fiddlesticks.json', '/tmp/Fiora.json', '/tmp/Fizz.json', '/tmp/Galio.json', '/tmp/Gangplank.json', '/tmp/Garen.json', '/tmp/Gnar.json', '/tmp/Gragas.json', '/tmp/Graves.json', '/tmp/Gwen.json', '/tmp/Hecarim.json', '/tmp/Heimerdinger.json', '/tmp/Hwei.json', '/tmp/Illaoi.json', '/tmp/Irelia.json', '/tmp/Ivern.json', '/tmp/Janna.json', '/tmp/JarvanIV.