In [13]:
import os
import numpy as np

# 1️⃣ Set dataset paths
POKEMON_DATASET_PATH = '/kaggle/input/pokemon-generation-one-22k/PokemonData/'

# 2️⃣ Optional sanity check
if not os.path.exists(POKEMON_DATASET_PATH):
    raise FileNotFoundError(f"{POKEMON_DATASET_PATH} not found!")
print("First Pokémon folders:", os.listdir(POKEMON_DATASET_PATH)[:5])

First Pokémon folders: ['Golbat', 'Beedrill', 'Caterpie', 'Clefable', 'Raichu']


In [None]:
 # Configuration variables (IMG_SIZE, batch size, etc.)



IMG_SIZE = (80, 80)          # Resize images
CHANNELS = 3                  # RGB images
BATCH_SIZE = 32
EPOCHS = 15
VALIDATION_SPLIT = 0.2

DATASET_PATH = '../input/pokemon-generation-one-22k/PokemonData/' 


In [15]:
# data_preprocessing.py
# Handles loading, preprocessing, augmentation
import os
import numpy as np
import cv2 as cv
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Configuration constants (moved from config.py)
IMG_SIZE = (128, 128)
CHANNELS = 3
VALIDATION_SPLIT = 0.2

import pandas as pd

def load_dataset(dataset_path):
    """
    Loads images and labels from dataset folder.
    Assumes dataset structure: dataset_path/Pokemon_Name/*.jpg
    """
    images, labels = [], []
    types_list = []  # For multi-label type prediction
    stats_list = []  # For regression (HP, Attack, etc.)

    # Correct path to metadata CSV inside Kaggle dataset
    metadata_path = os.path.join(os.path.dirname(dataset_path), 'pokemon_metadata.csv')
    metadata = pd.read_csv(metadata_path)

    for idx, row in metadata.iterrows():
        img_path = os.path.join(dataset_path, row['Image'])
        img = cv.imread(img_path)
        if img is None:
            continue
        img = cv.resize(img, IMG_SIZE)
        images.append(img)
        labels.append(row['Name'])
        types_list.append([row['Type1'], row.get('Type2', '')])  # some Pokémon have 2 types
        stats_list.append([row['HP'], row['Attack'], row['Defense'], row['Sp. Atk'], row['Sp. Def'], row['Speed']])

    images = np.array(images, dtype='float32') / 255.0
    labels = np.array(labels)
    stats_list = np.array(stats_list, dtype='float32')

    # Encode Pokémon names
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)
    labels_encoded = to_categorical(labels_encoded)

    return images, labels_encoded, types_list, stats_list, label_encoder

def create_datagen(x_train, y_train):
    datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        validation_split=VALIDATION_SPLIT
    )
    train_gen = datagen.flow(x_train, y_train, batch_size=32, subset='training')
    val_gen = datagen.flow(x_train, y_train, batch_size=32, subset='validation')
    return train_gen, val_gen


In [17]:
  # Defines the multi-output CNN model# model_architecture.py
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout

def build_multi_task_cnn(img_size, channels, num_pokemon, num_types, num_stats):
    """
    Builds a CNN with multi-task outputs:
      - Classification (Pokémon name)
      - Type prediction (multi-label)
      - Stats regression
    """
    inp = Input(shape=(img_size[0], img_size[1], channels))

    x = Conv2D(32, (3,3), activation='relu', padding='same')(inp)
    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2,2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Pokémon classification
    class_output = Dense(num_pokemon, activation='softmax', name='class_output')(x)

    # Type prediction (multi-label)
    type_output = Dense(num_types, activation='sigmoid', name='type_output')(x)

    # Stats regression
    stat_output = Dense(num_stats, activation='linear', name='stat_output')(x)

    model = Model(inputs=inp, outputs=[class_output, type_output, stat_output])
    return model


In [None]:
# utils.py
# Helper functions for visualization, splitting, etc.
import matplotlib.pyplot as plt
import numpy as np

def plot_image(img, title=''):
    plt.imshow(img)
    plt.title(title)
    plt.axis('off')
    plt.show()

def display_predictions(img, pred_class, pred_type, pred_stats, label_encoder):
    print("Predicted Pokémon:", label_encoder.inverse_transform([pred_class])[0])
    print("Predicted Type(s):", pred_type)
    print("Predicted Stats:", pred_stats)
    plot_image(img)


In [25]:
# -------------------------------
# Pokémon Multi-Task Training (No CSV)
# -------------------------------

import os
import numpy as np
import cv2 as cv
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# -------------------------------
# Config
# -------------------------------
IMG_SIZE = (128, 128)
CHANNELS = 3
BATCH_SIZE = 32
EPOCHS = 10
VALIDATION_SPLIT = 0.2

DATASET_PATH = '/kaggle/input/pokemon-generation-one-22k/PokemonData'

!ls /kaggle/input/pokemon-generation-one-22k/images/


# -------------------------------
# Data Loading
# -------------------------------
def load_dataset(dataset_path, img_size=(128,128), channels=3):
    """
    Loads images directly from folders.
    Assumes dataset structure: dataset_path/Pokemon_Name/*.jpg
    """
    images = []
    labels = []

    for pokemon_name in os.listdir(dataset_path):
        pokemon_folder = os.path.join(dataset_path, pokemon_name)
        if not os.path.isdir(pokemon_folder):
            continue
        for img_file in os.listdir(pokemon_folder):
            img_path = os.path.join(pokemon_folder, img_file)
            img = cv.imread(img_path)
            if img is None:
                continue
            img = cv.resize(img, img_size)
            images.append(img)
            labels.append(pokemon_name)

    images = np.array(images, dtype='float32') / 255.0
    labels = np.array(labels)

    # Encode Pokémon names
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)
    labels_encoded = to_categorical(labels_encoded)

    return images, labels_encoded, label_encoder

# Load dataset
images, labels_encoded, label_encoder = load_dataset(DATASET_PATH, img_size=IMG_SIZE, channels=CHANNELS)
num_pokemon = labels_encoded.shape[1]

# For multi-task placeholders (if you want to add them later)
num_types = 18
num_stats = 6
types_placeholder = np.random.rand(len(images), num_types)
stats_placeholder = np.random.rand(len(images), num_stats)

# -------------------------------
# Model Architecture
# -------------------------------
def build_multi_task_cnn(img_size, channels, num_pokemon, num_types, num_stats):
    inp = Input(shape=(img_size[0], img_size[1], channels))
    x = Conv2D(32, (3,3), activation='relu', padding='same')(inp)
    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2,2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    class_output = Dense(num_pokemon, activation='softmax', name='class_output')(x)
    type_output = Dense(num_types, activation='sigmoid', name='type_output')(x)
    stat_output = Dense(num_stats, activation='linear', name='stat_output')(x)

    model = Model(inputs=inp, outputs=[class_output, type_output, stat_output])
    return model

# -------------------------------
# Build and Compile Model
# -------------------------------
model = build_multi_task_cnn(IMG_SIZE, CHANNELS, num_pokemon, num_types, num_stats)
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss={
        'class_output': 'categorical_crossentropy',
        'type_output': 'binary_crossentropy',
        'stat_output': 'mse'
    },
    metrics={'class_output': 'accuracy'}
)

# -------------------------------
# Train Model
# -------------------------------
model.fit(
    images,
    {
        'class_output': labels_encoded,
        'type_output': types_placeholder,
        'stat_output': stats_placeholder
    },
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_split=VALIDATION_SPLIT
)

# -------------------------------
# Save Model
# -------------------------------
model.save('pokemon_multi_task_model.h5')

print("Training complete! Model saved as pokemon_multi_task_model.h5")


ls: cannot access '/kaggle/input/pokemon-generation-one-22k/images/': No such file or directory


I0000 00:00:1760314626.192861      37 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1760314626.193542      37 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Epoch 1/10


I0000 00:00:1760314639.296389     179 service.cc:148] XLA service 0x3a666b30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1760314639.297377     179 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1760314639.297408     179 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1760314639.750956     179 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  4/503[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m23s[0m 47ms/step - class_output_accuracy: 0.0000e+00 - class_output_loss: 6.9595 - loss: 34.4744 - stat_output_loss: 26.3568 - type_output_loss: 1.1581

I0000 00:00:1760314645.960873     179 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 67ms/step - class_output_accuracy: 0.0089 - class_output_loss: 5.0214 - loss: 7.0459 - stat_output_loss: 1.3054 - type_output_loss: 0.7191 - val_class_output_accuracy: 0.0000e+00 - val_class_output_loss: 7.8845 - val_loss: 8.6674 - val_stat_output_loss: 0.0906 - val_type_output_loss: 0.6936
Epoch 2/10
[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 49ms/step - class_output_accuracy: 0.0381 - class_output_loss: 4.5706 - loss: 5.3979 - stat_output_loss: 0.1310 - type_output_loss: 0.6963 - val_class_output_accuracy: 0.0000e+00 - val_class_output_loss: 14.0827 - val_loss: 14.8722 - val_stat_output_loss: 0.1024 - val_type_output_loss: 0.6940
Epoch 3/10
[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 49ms/step - class_output_accuracy: 0.1585 - class_output_loss: 3.5459 - loss: 4.3964 - stat_output_loss: 0.1538 - type_output_loss: 0.6967 - val_class_output_accuracy: 0.0000e+00 - val_class_

In [None]:
# -------------------------------
# Pokémon Prediction Demo
# -------------------------------

import os
import cv2 as cv
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# -------------------------------
# Settings
# -------------------------------
DATASET_PATH = '/kaggle/input/pokemon-generation-one-22k/PokemonData/'
MODEL_PATH = '/kaggle/working/pokemon_multi_task_model.h5'
IMG_SIZE = (128, 128)  # Use same as training
CHANNELS = 3

# -------------------------------
# Load model
# -------------------------------
model = load_model(MODEL_PATH, compile=False)
print("Model loaded.")

# -------------------------------
# Build label encoder from folder names
# -------------------------------
pokemon_names = sorted([d for d in os.listdir(DATASET_PATH) if os.path.isdir(os.path.join(DATASET_PATH, d))])
label_encoder = LabelEncoder()
label_encoder.fit(pokemon_names)
print("Label encoder ready with classes:", pokemon_names[:10], "...")

# -------------------------------
# Utility function to plot image
# -------------------------------
def plot_prediction(img_path, pred_class_idx):
    img = cv.imread(img_path)
    img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.title("Predicted: " + label_encoder.inverse_transform([pred_class_idx])[0])
    plt.show()

# -------------------------------
# Predict function
# -------------------------------
def predict_image(img_path):
    img = cv.imread(img_path)
    if img is None:
        print("Image not found:", img_path)
        return
    img_resized = cv.resize(img, IMG_SIZE)
    img_input = np.expand_dims(img_resized, axis=0) / 255.0
    pred_class, pred_type, pred_stats = model.predict(img_input)
    pred_class_idx = np.argmax(pred_class[0])
    plot_prediction(img_path, pred_class_idx)
    return pred_class_idx, pred_type[0], pred_stats[0]

# -------------------------------
# Test multiple images
# -------------------------------
test_images = [
    '/kaggle/input/pokemon-generation-one-22k/PokemonData/Abra/000-063Abra_RB.png',
    '/kaggle/input/pokemon-generation-one-22k/PokemonData/Abra/004-063Abra_OS_anime_2.png',
    '/kaggle/input/pokemon-generation-one-22k/PokemonData/Alakazam/000-065Alakazam_RB.png'
]

for img_path in test_images:
    predict_image(img_path)


In [None]:
jupyter nbconvert --to script /kaggle/working/pokemon_project.ipynb
