In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import cv2 as cv
import os
import pickle
from datetime import datetime
import sys

In [2]:
with open(r'/kaggle/input/coco-2017-dataset/coco2017/annotations/instances_train2017.json','r') as file:
    datajson=json.load(file)

In [3]:
with open(r'/kaggle/input/personpickle/Persons40000.pkl','rb') as fi:
    pickledata=pickle.load(fi)

In [4]:
class Preprocess():
    def __init__(self,shape=[640,640],finalshape=[320,320]):
        super(Preprocess,self).__init__()
        self.shape=shape
        self.finalshape=finalshape
        
    def process(self,indexes):
        images=[]
        labels=[]
        for index in indexes:
            image=cv.imread(fr"/kaggle/input/coco-2017-dataset/coco2017/train2017/{datajson['images'][int(index[0])]['file_name']}")
            image=cv.cvtColor(image,cv.COLOR_BGR2GRAY)
            imageshape=image.shape
            xmin,ymin,W,H=datajson['annotations'][int(index[1])]['bbox']
            temp0=int(self.shape[0]-imageshape[0])
            temp1=int(self.shape[1]-imageshape[1])
            xmin=int((temp1/2)+xmin)
            ymin=int((temp0/2)+ymin)
            xmax=int(W+xmin)
            ymax=int(H+ymin)
            image=cv.copyMakeBorder(image,temp0//2,temp0//2,temp1//2,temp1//2,borderType=cv.BORDER_CONSTANT,value=250)
            image=cv.resize(image,tuple(self.finalshape),interpolation=cv.INTER_LINEAR)
            images.append(image/255)
            labels.append([xmin/2,ymin/2,xmax/2,ymax/2])
        images=tf.convert_to_tensor(tf.cast(images,tf.float32))
        labels=tf.convert_to_tensor(labels)
        return images[...,tf.newaxis],labels

In [5]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', input_shape=(320, 320, 1)),
    tf.keras.layers.LayerNormalization(epsilon=1e-6),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(16, (3, 3), padding='same'),
    tf.keras.layers.LayerNormalization(epsilon=1e-6),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dropout(0.20), 

    tf.keras.layers.Conv2D(16, (3, 3), padding='same'),
    tf.keras.layers.LayerNormalization(epsilon=1e-6),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(12, (3, 3), padding='same'),
    tf.keras.layers.LayerNormalization(epsilon=1e-6),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(8, (3, 3), padding='same'),
    tf.keras.layers.LayerNormalization(epsilon=1e-6),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.20),  

    tf.keras.layers.Flatten(),

    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.20),  

    tf.keras.layers.Dense(4)
])

model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
handle=Preprocess()
images,labels=handle.process(pickledata)
Dataset=tf.data.Dataset.from_tensor_slices((images,labels))
Dataset=Dataset.batch(16).prefetch(tf.data.AUTOTUNE).cache()
images,labels=0,0

In [7]:
model.fit(Dataset,epochs=15)

Epoch 1/15


I0000 00:00:1727197606.505711     103 service.cc:145] XLA service 0x7873cc004d10 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1727197606.506593     103 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m  5/528[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m18s[0m 35ms/step - accuracy: 0.5804 - loss: 32100.8184 

I0000 00:00:1727197616.183800     103 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 48ms/step - accuracy: 0.5511 - loss: 8388.2725
Epoch 2/15
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 32ms/step - accuracy: 0.5556 - loss: 4765.5796
Epoch 3/15
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 32ms/step - accuracy: 0.5733 - loss: 4565.3682
Epoch 4/15
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 32ms/step - accuracy: 0.5976 - loss: 4415.2334
Epoch 5/15
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 32ms/step - accuracy: 0.6088 - loss: 4449.6240
Epoch 6/15
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 32ms/step - accuracy: 0.6136 - loss: 4382.9590
Epoch 7/15
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 32ms/step - accuracy: 0.6174 - loss: 4271.6846
Epoch 8/15
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 32ms/step - accuracy: 0.6190 - loss: 4272.5337
Epoch 9/15


<keras.src.callbacks.history.History at 0x7874ee10d360>

In [10]:
model.save(r'/kaggle/working/PersonsCOCO.keras')
model.save(r'/kaggle/working/PersonsCOCO.h5')
# model.save(r'/kaggle/working/PersonsCOCO')
# model.save_weights(r'/kaggle/working/PersonsCOCOweigts.h5')

In [None]:
handle=Preprocess()
image,annotation=handle(pickledata[3:4])
model(image)