In [1]:
import tensorflow as tf
import cv2
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Layer
from tensorflow.keras import Model
import os
from PIL import Image
import random

In [2]:
dataset_path = r'C:\Users\Siddhartha Devan V\Downloads\datasets\mnist_images\mnist_img_with_bb'
bb_path = r'C:\Users\Siddhartha Devan V\Downloads\datasets\mnist_images\mnist_img_enlarged_bb.csv'
labels = os.listdir(dataset_path)

In [3]:
annotations = pd.read_csv(bb_path)
annotations.head(10)

Unnamed: 0.1,Unnamed: 0,name,target_label,height,width,x_min,x_max,y_min,y_max
0,0,5_0.png,5,28,28,18,46,18,46
1,1,0_1.png,0,28,28,37,65,37,65
2,2,4_2.png,4,28,28,15,43,15,43
3,3,1_3.png,1,28,28,38,66,38,66
4,4,9_4.png,9,28,28,42,70,42,70
5,5,2_5.png,2,28,28,35,63,35,63
6,6,1_6.png,1,28,28,12,40,12,40
7,7,3_7.png,3,28,28,37,65,37,65
8,8,1_8.png,1,28,28,40,68,40,68
9,9,4_9.png,4,28,28,22,50,22,50


In [13]:
image_paths = []
bb = []
classes = []

for img_name,img_label in zip(annotations['name'], annotations['target_label']):
    img_path = os.path.join(dataset_path, str(img_label), img_name)
    image_paths.append(img_path)
    bb.append(annotations[annotations['name'] == img_name][['x_min', 'x_max', 'y_min', 'y_max']].values)
    classes.append(img_label)

print(image_paths[:5])
print(bb[:5])
print(classes[:5])

print(len(image_paths), len(bb), len(classes))

['C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\5\\5_0.png', 'C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\0\\0_1.png', 'C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\4\\4_2.png', 'C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\1\\1_3.png', 'C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\9\\9_4.png']
[array([[18, 46, 18, 46]], dtype=int64), array([[37, 65, 37, 65]], dtype=int64), array([[15, 43, 15, 43]], dtype=int64), array([[38, 66, 38, 66]], dtype=int64), array([[42, 70, 42, 70]], dtype=int64)]
[5, 0, 4, 1, 9]
70000 70000 70000


In [18]:
from sklearn.model_selection import train_test_split

image_paths_train, image_paths_test, classes_train, classes_test, bboxes_train, bboxes_test = train_test_split(
    image_paths, classes, bb, test_size=0.2, random_state=42, stratify=classes
)

In [None]:
print(len(image_paths_test), len(image_paths_train))
print(len(classes_test), len(classes_train))
print(len(bboxes_test), len(bboxes_train))

14000 56000
14000 56000
14000 56000


In [None]:
for i in range(10):
    print(classes_train.count(i), classes_test.count(i))

5522 1381
6302 1575
5592 1398
5713 1428
5459 1365
5050 1263
5501 1375
5834 1459
5460 1365
5567 1391


In [23]:
cv2.imread(image_paths[1]).shape

(70, 70, 3)

In [30]:
print(image_paths_train[:5])

['C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\0\\0_24367.png', 'C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\7\\7_33964.png', 'C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\0\\0_58668.png', 'C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\9\\9_63075.png', 'C:\\Users\\Siddhartha Devan V\\Downloads\\datasets\\mnist_images\\mnist_img_with_bb\\1\\1_49210.png']


In [36]:
def mapper_func(image_path, label, bbox):
    
    img = tf.io.read_file(image_path)
    img = tf.image.decode_image(img, channels=3)  
    img_scaled = img/255
    
    return img_scaled, {'y_label': label, 'bbox': bbox}

batch_size = 64

train_data_gen = tf.data.Dataset.from_tensor_slices((image_paths_train, classes_train, bboxes_train))
train_data_gen = train_data_gen.map(mapper_func, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_data_gen.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)

test_data_gen = tf.data.Dataset.from_tensor_slices((image_paths_test, classes_test, bboxes_test))
test_data_gen = test_data_gen.map(mapper_func, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_data_gen.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)


In [37]:
print(train_dataset)

<PrefetchDataset element_spec=(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), {'y_label': TensorSpec(shape=(None,), dtype=tf.int32, name=None), 'bbox': TensorSpec(shape=(None, 1, 4), dtype=tf.int64, name=None)})>


In [45]:
for i in train_dataset.take(1):
    print(i[0].shape, i[1].keys())

(64, 70, 70, 3) dict_keys(['y_label', 'bbox'])
