In [1]:
import os
import random

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sn
import tensorflow as tf
from sklearn.model_selection import train_test_split
# from sklearn.metrics import confusion_matrix

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout

import wandb
from wandb.keras import WandbMetricsLogger

import albumentations as A

2023-08-18 12:41:58.115169: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-18 12:41:58.116832: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-18 12:41:58.169442: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-18 12:41:58.170120: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
lr = 0.01
n_epochs = 10
batch_size = 128

In [3]:
def set_seed(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    tf.random.set_seed(seed_value)
    os.environ["PYTHONHASHSEED"] = str(seed_value)
    os.environ["TF_DETERMINISTIC_OPS"] = "true"
    

set_seed(42)

In [4]:
PATH_IMGS = "/kaggle/input/cifar10/cifar10/train/"

ids = []  # paths for each file
labels = []  # fildername - same label
label2index = {
    'airplane': 0,
    'horse': 1,
    'truck': 2,
    'automobile': 3,
    'ship': 4,
    'dog': 5,
    'bird': 6,
    'frog': 7,
    'cat': 8,
    'deer': 9,
}

for i, folder in enumerate(os.listdir(PATH_IMGS)):
    for image_name in os.listdir(os.path.join(PATH_IMGS, folder))[:500]:
        ids.append(os.path.join(PATH_IMGS, folder, image_name))
        labels.append(label2index[folder])

index2label = {y: x for x, y in label2index.items()}

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/cifar10/cifar10/train/'

In [None]:
ids_train, ids_valid, y_train, y_valid = train_test_split(ids, labels, test_size=0.2, shuffle=True)

In [None]:
class Dataset(tf.keras.utils.Sequence):
    def __init__(self, ids, y, shuffle=True, batch_size=256, transforms=None):
        self.ids = ids
        self.y = y
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.ids))
        self.batch_size = batch_size
        self.transforms = transforms
        
        if shuffle:
            self.on_epoch_end()
    
    def __len__(self):
        return len(self.ids) // self.batch_size
    
    def __getitem__(self, idx):
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_ids = [self.ids[i] for i in indexes]
        batch_y = [tf.keras.utils.to_categorical(self.y[i], 10) for i in indexes]
        
        batch_X = []
        for i in range(self.batch_size):
            img = cv2.imread(batch_ids[i])
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            if self.transforms:
                img = self.transforms(image=img)["image"]
            
            batch_X.append(img)
        
        return np.array(batch_X) / 255., np.array(batch_y)
        
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

In [None]:
train_transforms = A.Compose(
    [
        A.HorizontalFlip(p=0.5),
        A.Rotate(limit=90, p=1.),
        A.Blur(p=0.25),
        A.Resize(224, 224, p=1.),
    ]
)

valid_transforms = A.Compose(
    [
        A.Resize(224, 224, p=1.),
    ]
)

In [None]:
train_dataset = Dataset(ids_train, y_train, shuffle=False, batch_size=1, transforms=train_transforms)

In [None]:
X, y = train_dataset[0]
X.shape, y.shape

In [None]:
plt.figure(figsize=(3, 3))
plt.imshow(train_dataset[0][0][0])

In [None]:
train_dataset = Dataset(ids_train, y_train, shuffle=True, batch_size=batch_size, transforms=train_transforms)
valid_dataset = Dataset(ids_valid, y_valid, shuffle=False, batch_size=batch_size, transforms=valid_transforms)