In [None]:
import os
import scipy
import scipy.io as scio
from scipy import stats
from scipy import ndimage
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2

In [None]:
cwd = os.getcwd()

In [None]:
path_to_data = os.path.join(cwd, r"ShanghaiTech/part_A") # replace by the path where you put your data

In [None]:
path_to_data

### Creating the required folders in the folder containing the dataset

In [None]:
os.mkdir(os.path.join(path_to_data,r"train_data/GT_density_map"))
os.mkdir(os.path.join(path_to_data,r"train_data/images_cropped"))
os.mkdir(os.path.join(path_to_data,r"train_data/GT_density_map_cropped"))
os.mkdir(os.path.join(path_to_data,r"test_data/GT_density_map"))
os.mkdir(os.path.join(path_to_data,r"test_data/images_cropped"))
os.mkdir(os.path.join(path_to_data,r"test_data/GT_density_map_cropped"))

In [None]:
os.mkdir(os.path.join(path_to_data,r"validation_data"))
os.mkdir(os.path.join(path_to_data,r"validation_data/images_cropped"))
os.mkdir(os.path.join(path_to_data,r"validation_data/GT_density_map_cropped"))

### Defining the function that will allow us to generate the ground-truth density maps

In [None]:
def gaussian_filter_density(gt):
    #print(gt.shape)
    density = np.zeros(gt.shape, dtype=np.float32)
    gt_count = np.count_nonzero(gt)
    if gt_count == 0:
        return density

    pts = np.array(list(zip(np.nonzero(gt)[1], np.nonzero(gt)[0])))
    #print("pts=",pts)
    leafsize = 2048
    # build kdtree
    #print 'build kdtree...'
    tree = scipy.spatial.KDTree(pts.copy(), leafsize=leafsize)
    # query kdtree
    #print 'query kdtree...' 
    distances, locations = tree.query(pts, k=4, eps=10.)

    #print 'generate density...'
    for i, pt in enumerate(pts):
        pt2d = np.zeros(gt.shape, dtype=np.float32)
        pt2d[pt[1],pt[0]] = 1.
        if gt_count > 1:
            sigma =((distances[i][1]+distances[i][2]+distances[i][3])/3)*0.3
        else:
            sigma = np.average(np.array(gt.shape))/2./2. #case: 1 point

        density += scipy.ndimage.filters.gaussian_filter(pt2d, sigma, mode='constant')
    #print 'done.'
    return density

### Creating the ground-truth density maps for both the training set and the test set

In [None]:
nb_train = len(os.listdir(os.path.join(path_to_data,r"train_data/images")))
for num in range(1,nb_train+1):
    img = plt.imread(os.path.join(path_to_data,r"train_data/images/IMG_%s.jpg"%str(num)))
    mat = scio.loadmat(os.path.join(path_to_data,r"train_data/ground-truth/GT_IMG_%s.mat"%str(num)))
    k = np.zeros((img.shape[0],img.shape[1]))
    gt = mat["image_info"][0][0][0][0][0]
    for i in range(0,len(gt)):
        if int(gt[i][1])<img.shape[0] and int(gt[i][0])<img.shape[1]:
            k[int(gt[i][1]),int(gt[i][0])] = 1
    dens = gaussian_filter_density(k)
    np.save(os.path.join(path_to_data,r"train_data/GT_density_map/GT_density_IMG_%s"%str(num)), dens)

In [None]:
nb_test = len(os.listdir(os.path.join(path_to_data,r"test_data/images")))
for num in range(1,nb_test+1):
    img = plt.imread(os.path.join(path_to_data,r"test_data/images/IMG_%s.jpg"%str(num)))
    mat = scio.loadmat(os.path.join(path_to_data,r"test_data/ground-truth/GT_IMG_%s.mat"%str(num)))
    k = np.zeros((img.shape[0],img.shape[1]))
    gt = mat["image_info"][0][0][0][0][0]
    for i in range(0,len(gt)):
        if int(gt[i][1])<img.shape[0] and int(gt[i][0])<img.shape[1]:
            k[int(gt[i][1]),int(gt[i][0])] = 1
    dens = gaussian_filter_density(k)
    np.save(os.path.join(path_to_data,r"test_data/GT_density_map/GT_density_IMG_%s"%str(num)), dens)

### Cropping 9 images of size 224x224 from each image (for the training and the test sets)

In [None]:
# SCRIPT PART FOR CROPPING IMAGES. Change paths accordingly.
images_path = os.path.join(path_to_data, r"train_data/images")
densities_path = os.path.join(path_to_data, r"train_data/GT_density_map")
#next are final paths to be used for getting training set
cropped_images_path = os.path.join(path_to_data, r"train_data/images_cropped")
cropped_densities_path = os.path.join(path_to_data, r"train_data/GT_density_map_cropped")
counts = [0]*9*nb_train
for num in range(1,nb_train+1):
    if num%25==0 :
        print(num)
    img = cv2.imread(os.path.join(images_path, r"IMG_%s.jpg"%str(num)), flags=cv2.IMREAD_COLOR))
    dens_map = np.load(os.path.join(densities_path, r"GT_density_IMG_%s.npy"%str(num)))
    h,w = img.shape[0],img.shape[1]
    #check if image has both dimensions greater than 224 otherwise skip
    if  w >= 224 and h >= 224:
        cropped_img = np.zeros((9,224,224,3)) 
        cropped_map = np.zeros((9,224,224)) # 9 density maps 
        if len(img.shape) < 3: # some images are in grey, ie only one canal
            cropped_img = np.zeros((9,224,224))
        
        # We make 4 crops at the 4 corners of image as well as the corresponding density map
        cropped_img[0], cropped_img[1] = img[:224,:224],img[:224,-224:]
        cropped_img[2], cropped_img[3] = img[-224:,:224], img[-224:,-224:]
        cropped_map[0], cropped_map[1] = dens_map[:224,:224], dens_map[:224,-224:]
        cropped_map[2], cropped_map[3] = dens_map[-224:,:224], dens_map[-224:,-224:]

        #We make five random crops of size 224x224 for both image and density map

        x=np.random.randint(0,w-224+1,5)
        y=np.random.randint(0,h-224+1,5)
        for i in range(4,9):
            cropped_img[i] = img[y[i-4]:y[i-4]+224,x[i-4]:x[i-4]+224]
            cropped_map[i] = dens_map[y[i-4]:y[i-4]+224,x[i-4]:x[i-4]+224]
        for i in range(9):
            cv2.imwrite(os.path.join(cropped_images_path, r"cropped_IMG_%s_%s_0.jpg"%(str(num),str(i))),
                        cropped_img[i])
            cv2.imwrite(os.path.join(cropped_images_path, r"cropped_IMG_%s_%s_1.jpg"%(str(num),str(i))),
                        cv2.flip(cropped_img[i],1))
            np.save(os.path.join(cropped_densities_path, r"GT_density_cropped_IMG_%s_%s_0"%(str(num),str(i))),
                    cropped_map[i])
            np.save(os.path.join(cropped_densities_path, r"GT_density_cropped_IMG_%s_%s_1"%(str(num),str(i))),
                    np.fliplr(cropped_map[i]))
            counts[i+num*(9-1)] = np.sum(cropped_map[i])
counts_train=np.asarray(counts)
np.save(os.path.join(path_to_data, r"train_data/count_train"), counts_train)

In [None]:
# SCRIPT PART FOR CROPPING IMAGES. Change paths accordingly.
images_path = os.path.join(path_to_data, r"test_data/images")
densities_path = os.path.join(path_to_data, r"test_data/GT_density_map")
#next are final paths to be used for getting test set
cropped_images_path = os.path.join(path_to_data, r"test_data/images_cropped")
cropped_densities_path = os.path.join(path_to_data, r"test_data/GT_density_map_cropped")
counts = [0]*9*nb_test
for num in range(1,nb_test+1):
    if num%25==0 :
        print(num)
    img = cv2.imread(os.path.join(images_path, r"IMG_%s.jpg"%str(num)), flags=cv2.IMREAD_COLOR)
    dens_map = np.load(os.path.join(densities_path, r"GT_density_IMG_%s.npy"%str(num)))
    h,w = img.shape[0],img.shape[1]
    #check if image has both dimensions greater than 224 otherwise skip
    if  w >= 224 and h >= 224:
        cropped_img = np.zeros((9,224,224,3)) 
        cropped_map = np.zeros((9,224,224)) # 9 density maps 
        if len(img.shape) < 3: # some images are in grey, ie only one canal
            cropped_img = np.zeros((9,224,224))
        
        # We make 4 crops at the 4 corners of image as well as the corresponding density map
        cropped_img[0], cropped_img[1] = img[:224,:224],img[:224,-224:]
        cropped_img[2], cropped_img[3] = img[-224:,:224], img[-224:,-224:]
        cropped_map[0], cropped_map[1] = dens_map[:224,:224], dens_map[:224,-224:]
        cropped_map[2], cropped_map[3] = dens_map[-224:,:224], dens_map[-224:,-224:]

        #We make five random crops of size 224x224 for both image and density map

        x=np.random.randint(0,w-224+1,5)
        y=np.random.randint(0,h-224+1,5)
        for i in range(4,9):
            cropped_img[i] = img[y[i-4]:y[i-4]+224,x[i-4]:x[i-4]+224]
            cropped_map[i] = dens_map[y[i-4]:y[i-4]+224,x[i-4]:x[i-4]+224]
        for i in range(9):
            cv2.imwrite(os.path.join(cropped_images_path, r"cropped_IMG_%s_%s_0.jpg"%(str(num),str(i))),
                        cropped_img[i])
            cv2.imwrite(os.path.join(cropped_images_path, r"cropped_IMG_%s_%s_1.jpg"%(str(num),str(i))),
                        cv2.flip(cropped_img[i],1))
            np.save(os.path.join(cropped_densities_path, r"GT_density_cropped_IMG_%s_%s_0"%(str(num),str(i))),
                    cropped_map[i])
            np.save(os.path.join(cropped_densities_path, r"GT_density_cropped_IMG_%s_%s_1"%(str(num),str(i))),
                    cropped_map[i])
            counts[i+num*(9-1)] = np.sum(cropped_map[i])
counts_test=np.asarray(counts)
np.save(os.path.join(path_to_data, r"test_data/count_test"), counts_test)

### Splitting the training set into 90% of training and 10% of validation

In [None]:
np.save(os.path.join(path_to_data, r"validation_data/count_validation"), counts_train)

In [None]:
crop_images = os.listdir(os.path.join(path_to_data, r"train_data/images_cropped"))
#crop_dens_maps = os.listdir(os.path.join(path_to_data, r"train_data/GT_density_map_cropped"))
nb_train_crop = len(crop_images)
print(nb_train_crop)

In [None]:
np.random.seed(42)
index = np.random.choice(np.arange(nb_train_crop), int(0.1*nb_train_crop), replace=False)

In [None]:
for i in index:
    crop_img = cv2.imread(os.path.join(path_to_data, r"train_data/images_cropped/%s"%crop_images[i]),
                          flags=cv2.IMREAD_COLOR)
    name_dens_map = "GT_density_cropped_" + crop_images[i].split('_',1)[1].split('.')[0] + ".npy"
    crop_dens_map = np.load(os.path.join(path_to_data, r"train_data/GT_density_map_cropped/%s"%name_dens_map))
    cv2.imwrite(os.path.join(path_to_data, r"validation_data/images_cropped/%s"%crop_images[i]), crop_img)
    np.save(os.path.join(path_to_data, r"validation_data/GT_density_map_cropped/%s"%crop_dens_maps[i][:-4]),
            crop_dens_map)
    os.remove(os.path.join(path_to_data, r"train_data/images_cropped/%s"%crop_images[i]))
    os.remove(os.path.join(path_to_data, r"train_data/GT_density_map_cropped/%s"%crop_dens_maps[i]))

In [None]:
train_crop_images = os.listdir(os.path.join(path_to_data, r"train_data/images_cropped"))
validation_crop_images = os.listdir(os.path.join(path_to_data, r"validation_data/images_cropped"))
test_crop_images = os.listdir(os.path.join(path_to_data, r"test_data/images_cropped"))
nb_train_crop = len(train_crop_images)
nb_validation_crop = len(validation_crop_images)
nb_test_crop = len(test_crop_images)
print(nb_train_crop)
print(nb_validation_crop)
print(nb_test_crop)