# Create table of Pokemon Cards

In [None]:
from datasets import load_dataset
dataset = load_dataset("TheFusion21/PokemonCards")

## Create a 1K sample of the Pokemon Cards dataset

In [None]:
import numpy as np

SEED = 1

np.random.seed(SEED)

print(f"Dataset shape: {dataset['train'].shape}")

indices_1k = np.random.choice(dataset['train'].shape[0], size=1000, replace=False)
dataset_1k = dataset['train'].select(indices_1k)

In [None]:
indices_1k.shape

## Start up Weights and Biases

In [None]:
import wandb

run = wandb.init(project='pokemon-cards', entity=None, job_type="upload")
raw_data_artifact = wandb.Artifact('pokemon_cards', type="raw_data")

In [None]:
from PIL import Image
import requests
from pathlib import Path

labels = dataset_1k.column_names
table = wandb.Table(labels + ["image"] + ['split'])

for row in dataset_1k:
    r = requests.get(row['image_url'], allow_redirects=True)
    with open(Path(row['image_url']).name, 'wb') as f:
        f.write(r.content)
    image = Image.open(Path(row['image_url']).name)
    # if row['image_url'] == 'https://images.pokemontcg.io/mcd14/10_hires.png':
    #     print(row)
    #     raise
    table.add_data(
        row['id'],
        row['image_url'],
        row['caption'],
        row['name'],
        row['hp'],
        row['set_name'],
        wandb.Image(
            image,
            caption=row['caption']),
        "None", # we don't have a dataset split yet
        )

In [None]:
raw_data_artifact.add(table, f"pokemon_table_1k_seed_{SEED}")

In [None]:
run.log_artifact(raw_data_artifact)
run.finish()