In [5]:
import zipfile
data_path = 'data'


path = Path(data_path)

if not path.exists():
    path.mkdir(parents=True, exist_ok=True)



with zipfile.ZipFile('train.zip', 'r') as zip_ref:
    zip_ref.extractall(data_path)

In [10]:
from torch.utils.data import Dataset
from PIL import Image
import os

class TestImagesDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.files = [f for f in os.listdir(root_dir)
                      if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_name = self.files[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, img_name

test_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

test_dataset = TestImagesDataset("data/test", transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [1]:
import os
from pathlib import Path
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

data_path = 'data'
train_dir = os.path.join(data_path, "train")
test_dir = os.path.join(data_path, "test")

class_names = ['cat','elephant','butterfly','sheep','spider',
               'horse','dog','cow','chicken','squirrel']
num_classes = len(class_names)

image_size = (224, 224)
batch_size = 64
num_epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
val_size = int(0.2 * len(train_dataset))
train_size = len(train_dataset) - val_size
train_subset, val_subset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)

def mixup_data(x, y, alpha=1.0):
    if alpha <= 0:
        return x, y, 1
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(x.device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def cutmix_data(x, y, alpha=1.0):
    """Возвращает смешанные входы и цели (CutMix)."""
    if alpha <= 0:
        return x, y, 1
    lam = np.random.beta(alpha, alpha)
    batch_size, _, h, w = x.size()
    index = torch.randperm(batch_size).to(x.device)
    cut_w = int(w * np.sqrt(1 - lam))
    cut_h = int(h * np.sqrt(1 - lam))
    cx = np.random.randint(w)
    cy = np.random.randint(h)
    x1 = np.clip(cx - cut_w // 2, 0, w)
    x2 = np.clip(cx + cut_w // 2, 0, w)
    y1 = np.clip(cy - cut_h // 2, 0, h)
    y2 = np.clip(cy + cut_h // 2, 0, h)
    x[:, :, y1:y2, x1:x2] = x[index, :, y1:y2, x1:x2]
    lam = 1 - ((x2 - x1) * (y2 - y1) / (w * h))
    y_a, y_b = y, y[index]
    return x, y_a, y_b, lam

class CustomCNN(nn.Module):
    def __init__(self, num_classes=10, dropout_rate=0.5):
        super(CustomCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2), nn.Dropout(dropout_rate),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2), nn.Dropout(dropout_rate),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2), nn.Dropout(dropout_rate)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * (image_size[0] // 8) * (image_size[1] // 8), 512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        return self.classifier(self.features(x))

def build_resnet(num_classes=10, fine_tune_at=-50, dropout_rate=0.5):
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
    for param in model.parameters():
        param.requires_grad = False
    if fine_tune_at is not None:
        for param in list(model.parameters())[fine_tune_at:]:
            param.requires_grad = True
    in_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(in_features, 512),
        nn.ReLU(),
        nn.Dropout(dropout_rate),
        nn.Linear(512, num_classes)
    )
    return model

def train_one_epoch(model, loader, criterion, optimizer, use_mixup=True, use_cutmix=False):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for imgs, labels in tqdm(loader, desc="Train", leave=False):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()

        # Применяем MixUp или CutMix
        if use_mixup and random.random() < 0.5:
            imgs, y_a, y_b, lam = mixup_data(imgs, labels)
            outputs = model(imgs)
            loss = mixup_criterion(criterion, outputs, y_a, y_b, lam)
        elif use_cutmix and random.random() < 0.5:
            imgs, y_a, y_b, lam = cutmix_data(imgs, labels)
            outputs = model(imgs)
            loss = mixup_criterion(criterion, outputs, y_a, y_b, lam)
        else:
            outputs = model(imgs)
            loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        running_loss += loss.item() * imgs.size(0)
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

def validate(model, loader, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * imgs.size(0)
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum().item()
            total += labels.size(0)
    return running_loss / total, correct / total


use_resnet = True    
optimizer_name = "adam"  

model = build_resnet(num_classes, fine_tune_at=-50) if use_resnet else CustomCNN(num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

if optimizer_name == "adam":
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
elif optimizer_name == "rmsprop":
    optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
else:
    optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, momentum=0.9)

scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)
early_stop_patience = 3
best_val_acc = 0.0
no_improve_epochs = 0

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, use_mixup=True, use_cutmix=True)
    val_loss, val_acc = validate(model, val_loader, criterion)
    scheduler.step(val_loss)

    print(f"Train: loss={train_loss:.4f}, acc={train_acc:.4f}")
    print(f"Val:   loss={val_loss:.4f}, acc={val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pt")
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1
        if no_improve_epochs >= early_stop_patience:
            print("Early stopping!")
            break


Epoch 1/5


                                                                                

Train: loss=1.1320, acc=0.6695
Val:   loss=0.6632, acc=0.9624

Epoch 2/5


                                                                                

Train: loss=0.9771, acc=0.7412
Val:   loss=0.6498, acc=0.9667

Epoch 3/5


                                                                                

Train: loss=0.9871, acc=0.6965
Val:   loss=0.6207, acc=0.9733

Epoch 4/5


                                                                                

Train: loss=0.9469, acc=0.7044
Val:   loss=0.5872, acc=0.9768

Epoch 5/5


                                                                                

Train: loss=0.9769, acc=0.7005
Val:   loss=0.6003, acc=0.9740


In [2]:
torch.save(model.state_dict(), "best_model.pt")

In [12]:
import os
from PIL import Image
import torch
from torchvision import transforms
from tqdm import tqdm

ret = {'butterfly': 2, 
       'cat': 0, 
       'chicken': 8, 
       'cow': 7, 
       'dog': 6,
       'elephant': 1,
       'horse': 5,
       'sheep': 3,
       'spider': 4,
       'squirrel': 9}

class_names = list(ret.keys())

model.eval()
ids = []
labels =  []
with torch.no_grad():
    for images, filenames in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)
        for fname, pred in zip(filenames, preds.cpu().numpy()):
            ids.append(fname)
            labels.append(ret[class_names[pred]])

In [13]:
import pandas as pd
submission = pd.DataFrame({
    'id': ids,
    'label': labels
})

In [14]:
submission.to_csv('submission.csv', index=False)

In [6]:
print(train_dataset.class_to_idx)
print(class_names)

{'cat': 0, 'elephant': 1, 'butterfly': 2, 'sheep': 3, 'spider': 4, 'horse': 5, 'dog': 6, 'cow': 7, 'chicken': 8, 'squirrel': 9}
['cat', 'elephant', 'butterfly', 'sheep', 'spider', 'horse', 'dog', 'cow', 'chicken', 'squirrel']


In [44]:
class_names = list(train_dataset.class_to_idx.keys())

In [8]:
print(train_dataset.class_to_idx)
#print(test_dataset.class_to_idx)

{'butterfly': 0, 'cat': 1, 'chicken': 2, 'cow': 3, 'dog': 4, 'elephant': 5, 'horse': 6, 'sheep': 7, 'spider': 8, 'squirrel': 9}


In [9]:
test_dataset = datasets.ImageFolder(test_dir, transform=val_test_transforms)

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [10]:
test_dataset.class_to_idx

{'.ipynb_checkpoints': 0}

In [14]:
test_dataset

Dataset ImageFolder
    Number of datapoints: 1
    Root location: data/test
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [29]:
# Требуется:
# pip install -U tensorflow keras-tuner

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Rescaling, Input
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input as resnet_preprocess
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import keras_tuner as kt
from pathlib import Path
import numpy as np
import os

# ----------------------------
# Параметры общего пайплайна
# ----------------------------
data_path = 'data'  # ваша папка с train/val/test
train_dir = os.path.join(data_path, "train")
test_dir = os.path.join(data_path, "test")  # убедитесь, что есть тестовый набор
class_names = ['cat','elephant','butterfly','sheep','spider','horse','dog','cow','chicken','squirrel']
AUTOTUNE = tf.data.AUTOTUNE

# Для ResNet — стандартный размер 224x224. Для кастомной сети можно уменьшать.
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 64

# ----------------------------
# Создаём датасеты (train/val/test)
# ----------------------------
# Если у вас нет отдельного test — можно разделить train на train+val как раньше.
train_dataset = image_dataset_from_directory(
    train_dir,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    subset="training",
    class_names=class_names,
    seed=42
)

val_dataset = image_dataset_from_directory(
    train_dir,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    subset="validation",
    class_names=class_names,
    seed=42
)

# Опционально: если есть тестовая папка
if os.path.exists(test_dir):
    test_dataset = image_dataset_from_directory(
        test_dir,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        shuffle=False,
        class_names=class_names
    )
else:
    test_dataset = None

# Кеш/префетч
train_dataset = train_dataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_dataset = val_dataset.cache().prefetch(buffer_size=AUTOTUNE)
if test_dataset is not None:
    test_dataset = test_dataset.cache().prefetch(buffer_size=AUTOTUNE)

# ----------------------------
# Data Augmentation слой (общий)
# ----------------------------
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.08),
    layers.RandomZoom(0.08),
    layers.RandomTranslation(0.05, 0.05),
    layers.RandomContrast(0.08),
], name="data_augmentation")

# ----------------------------
# Вариант A: Transfer learning с ResNet50V2
#   - сначала тренируем только головой (feature extractor frozen)
#   - затем "размораживаем" последние N слоёв и дообучаем с малым lr
# ----------------------------
def build_resnet_model(num_dense_units=512, dropout_rate=0.5, fine_tune_at=None):
    # Вход
    inputs = Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
    x = data_augmentation(inputs)
    # ResNet ожидает предобработку
    x = layers.Lambda(resnet_preprocess)(x)
    # Базовая модель
    base_model = ResNet50V2(weights='imagenet', include_top=False, input_tensor=x)
    base_model.trainable = False  # сначала заморожена

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(num_dense_units, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    # можно добавить ещё один плотный слой по желанию
    outputs = Dense(len(class_names), activation='softmax')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    # Компиляция
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    # Если указали fine_tune_at (номер слоя), размораживаем tail
    if fine_tune_at is not None:
        base_model.trainable = True
        # замораживаем все до fine_tune_at
        for layer in base_model.layers[:fine_tune_at]:
            layer.trainable = False
        # уменьшаем lr для тонкой настройки
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )

    return model, base_model

# Коллбэки
checkpoint_path = "best_resnet_model.h5"
callbacks = [
    EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-7),
    ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, save_weights_only=False)
]

# Тренировка: сначала "head" несколько эпох, затем тонкая настройка
resnet_model, base = build_resnet_model(num_dense_units=512, dropout_rate=0.5, fine_tune_at=None)
initial_epochs = 5
fine_tune_epochs = 5  # можно также 5, чтобы обе стадии были короткими

history_head = resnet_model.fit(
    train_dataset,
    epochs=initial_epochs,
    validation_data=val_dataset,
    callbacks=callbacks,
    verbose=1
)

# Тонкая настройка (fine-tuning)
fine_tune_at = -30  # можно варьировать: -50, -30, None
if isinstance(fine_tune_at, int) and fine_tune_at < 0:
    ft_at = len(base.layers) + fine_tune_at
else:
    ft_at = fine_tune_at

resnet_model, base = build_resnet_model(num_dense_units=512, dropout_rate=0.5, fine_tune_at=ft_at)
if os.path.exists(checkpoint_path):
    try:
        resnet_model.load_weights(checkpoint_path)
    except Exception:
        pass

history_finetune = resnet_model.fit(
    train_dataset,
    epochs=fine_tune_epochs,
    validation_data=val_dataset,
    callbacks=callbacks,
    verbose=1
)

if test_dataset is not None:
    test_loss, test_acc = resnet_model.evaluate(test_dataset)
    print(f"ResNet test accuracy: {test_acc:.4f}")


def build_custom_cnn(hp):
    inputs = Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
    x = data_augmentation(inputs)
    x = layers.Rescaling(1./255)(x)
    
    num_blocks = hp.Int("num_blocks", 1, 4, default=2)           
    filters0 = hp.Int("filters0", 32, 128, step=32, default=32)
    convs_per_block = hp.Int("convs_per_block", 1, 3, default=2) 
    kernel_size = hp.Choice("kernel_size", [3,5,7], default=3)  
    dropout_rate = hp.Float("dropout_rate", 0.2, 0.6, step=0.1, default=0.4)
    use_extra_dense = hp.Boolean("use_extra_dense", default=True)
    dense_units = hp.Int("dense_units", 128, 1024, step=128, default=512)
    num_dense_layers = hp.Int("num_dense_layers", 1, 2, default=1)

    x_filters = filters0
    for b in range(num_blocks):
        for c in range(convs_per_block):
            x = Conv2D(x_filters, (kernel_size, kernel_size), padding='same', activation='relu')(x)
        x = MaxPooling2D((2,2))(x)
        x = Dropout(dropout_rate)(x)
        x_filters *= 2

    x = Flatten()(x)
    for d in range(num_dense_layers):
        x = Dense(dense_units, activation='relu')(x)
        x = Dropout(dropout_rate)(x)
    outputs = Dense(len(class_names), activation='softmax')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    opt_choice = hp.Choice("optimizer", ["adam", "rmsprop", "sgd"], default="adam")
    if opt_choice == "adam":
        optimizer = tf.keras.optimizers.Adam()
    elif opt_choice == "rmsprop":
        optimizer = tf.keras.optimizers.RMSprop()
    else:
        optimizer = tf.keras.optimizers.SGD(momentum=0.9)

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Keras Tuner setup
tuner_dir = "kt_tuner"
tuner = kt.RandomSearch(
    build_custom_cnn,
    objective="val_accuracy",
    max_trials=20,       # увеличьте при наличии вычислительных ресурсов
    executions_per_trial=1,
    directory=tuner_dir,
    project_name="cnn_search"
)

# Вывод краткой сводки перед запуском
print("Начинаем поиск архитектур кастомной сети (Keras Tuner). Это может занять долгое время.")

# Run search (можно контролировать epochs и batch_size)
tuner.search(
    train_dataset,
    epochs=15,
    validation_data=val_dataset,
    callbacks=[EarlyStopping(monitor='val_loss', patience=5)]
)

# Лучшая модель
best_model = tuner.get_best_models(num_models=1)[0]
# Оценка на тесте
if test_dataset is not None:
    loss, acc = best_model.evaluate(test_dataset)
    print(f"Best custom CNN test accuracy: {acc:.4f}")

# ----------------------------
# Полезные замечания по поиску гиперпараметров и предотвращению переобучения
# ----------------------------
# 1) Тщательно следите за графиками train/val loss и accuracy — если train >> val (val хуже), значит переобучение.
# 2) Увеличивайте Data Augmentation и/или собирайте больше данных при выраженном переобучении.
# 3) Используйте EarlyStopping и ReduceLROnPlateau — в коде включено.
# 4) Попробуйте разные значения fine_tune_at (размораживать побольше/меньше слоёв ResNet).
# 5) Экспериментируйте с image size: 224 или 299 дают больше информации, но медленнее.
# 6) Попробуйте балансировать классы (class_weight) если у вас несбалансированные классы.
# 7) Попробуйте MixUp / CutMix augmentation для лучшей обобщающей способности.
# 8) Используйте test_dataset только для финальной оценки. Для подбора гиперпараметров используйте val_dataset.

# Пример генерации class_weight при дисбалансе:
# from sklearn.utils import class_weight
# y = []  # собрать метки из train_dataset
# for batch_x, batch_y in train_dataset.unbatch().batch(10000): y = np.concatenate([y, batch_y.numpy()])
# class_weights = class_weight.compute_class_weight("balanced", classes=np.unique(y), y=y)
# class_weights_dict = dict(enumerate(class_weights))

# Затем передавайте class_weight=class_weights_dict в model.fit(...)



2025-10-18 08:59:08.165585: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-18 08:59:08.192927: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-18 08:59:08.757328: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Found 21949 files belonging to 10 classes.
Using 17560 files for training.


W0000 00:00:1760777949.595580  302420 gpu_device.cc:2431] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.
W0000 00:00:1760777949.605847  302420 gpu_device.cc:2431] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.
I0000 00:00:1760777949.619854  302420 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 27191 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 5090, pci bus id: 0000:01:00.0, compute capability: 12.0
2025-10-18 08:59:09.830705: W tensorflow/compiler/mlir/tools/kernel_gen/tf_gpu_runtime_wrappers.cc:40] 'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_INVALID_PTX'

2025-10-18 08:59:09.830720: W tensorflow/compiler/mlir/tools/kernel_gen/tf_gpu_runtime_wrappers.cc:40] 'cuModuleGetFunctio

InternalError: {{function_node __wrapped__Equal_device_/job:localhost/replica:0/task:0/device:GPU:0}} 'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, 0, reinterpret_cast<CUstream>(stream), params, nullptr)' failed with 'CUDA_ERROR_INVALID_HANDLE' [Op:Equal] name: 

In [30]:
import tensorflow as tf
print(tf.__version__)
print(tf.sysconfig.get_build_info()["cuda_version"])
print(tf.sysconfig.get_build_info()["cudnn_version"])

2.20.0
12.5.1
9
