In [1]:
import pandas as pd
import pickle
import numpy as np
import os
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
import multiprocessing

In [2]:
pimagepath = os.getcwd() + "/HO/pos_sub/"
os.makedirs(pimagepath, exist_ok=True)
nimagepath = os.getcwd() + "/HO/neg_sub/"
os.makedirs(nimagepath, exist_ok=True)

In [3]:
data_path = os.path.dirname(os.getcwd()) + "/data_cv.pkl"
with open(data_path, 'rb') as file:
    myvar = pickle.load(file)

HO_P_list = myvar[2]
HO_N_list = myvar[3]

In [4]:
def sliding_window(image, stride, imgSize):
    height, width, _ = image.shape
    img = []
    a1 = list(range(0, height-imgSize+stride, stride))
    a2 = list(range(0, width-imgSize+stride, stride))
    if (a1[-1]+imgSize != height):
        a1[-1] = height-imgSize
    if (a2[-1]+imgSize != width):
        a2[-1] = width-imgSize
    for y in a1:
        for x in a2:
            im1 = image[y:y+imgSize, x:x+imgSize, :]
            img.append(np.array(im1))
    return img

In [5]:
def handle_one_image(path, sType):
    image_data = []
    im = Image.open(path)
    im = np.array(im)
    if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
        img = sliding_window(im, STRIDE, SIZE)
        for i in range(len(img)):
            if(img[i].shape[2] >=3):
                image_data.append(img[i])
        return image_data, sType
    else:
        # indicate that no images are available
        return [], sType

In [6]:
SIZE = 128
STRIDE = 64
pimage_dict = {}
nimage_dict = {}
psubimage_counter = 0
pimage_counter = 0
nsubimage_counter = 0
nimage_counter = 0

In [7]:
def gen_sub_img(posl, negl):
    global pimage_dict
    global nimage_dict
    global psubimage_counter
    global pimage_counter
    global nsubimage_counter
    global nimage_counter
    print("Creating Sub images")
    # Create a list of tuples consisting of the file path, and the class
    # dictionary info for each of the cl arguments
    args = []
    for img in posl:
        path = os.getcwd() + "/positive/" + img
        args.append((path, "PS_"))
    for img in negl:
        path = os.getcwd() + "/negative/" + img
        args.append((path, "NS_"))
            
    num_workers = multiprocessing.cpu_count()  

    with multiprocessing.Pool(processes = num_workers) as pool:   # or however many processes
        image_counter = 0
        
        # Use multiprocessing to call handle_on_image(pathname, info)
        # and return the results in order
        for images, sType in pool.starmap(handle_one_image, args):
            image_list = []
            # Images is a list of returned images.  info is the class_dictionary info that we passed
            if(sType == "PS_"):
                for image in images:
                    image_list.append(psubimage_counter)
                    file_name = str(pimagepath + sType + str(psubimage_counter) + ".png")
                    plt.imsave(file_name, image)
                    psubimage_counter += 1
                pimage_dict[pimage_counter] = image_list
                pimage_counter += 1
            else:
                for image in images:
                    image_list.append(nsubimage_counter)
                    file_name = str(nimagepath + sType + str(nsubimage_counter) + ".png")
                    plt.imsave(file_name, image)
                    nsubimage_counter += 1
                nimage_dict[nimage_counter] = image_list
                nimage_counter += 1
    print("Sliding window process finished")

In [8]:
data_partition = 10
divp = int(len(HO_P_list)/data_partition)
divn = int(len(HO_N_list)/data_partition)
fromidx = 0
toidx = 0
for i in range(data_partition):
    fromidx, toidx = i*divp, (i+1)*divp
    P = HO_P_list[fromidx : toidx]
    fromidx, toidx =  i*divn, (i+1)*divn
    N = HO_N_list[fromidx : toidx]
    gen_sub_img(P, N)
    print("Sub-image generation finished for data part -> {}".format(i+1))

Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 1
Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 2
Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 3
Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 4
Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 5
Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 6
Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 7
Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 8
Creating Sub images
Sliding window process finished
Sub-image generation finished for data part -> 9
Creating Sub images
Sliding window process finished
Sub-image generation finished for data 

In [9]:
len(nimage_dict.keys())

4500

In [10]:
pli = [len(pimage_dict[k]) for k in pimage_dict.keys()]
nli = [len(nimage_dict[k]) for k in nimage_dict.keys()]

In [11]:
pli = [len(pimage_dict[k]) for k in pimage_dict.keys() if len(pimage_dict[k]) != 0]
nli = [len(nimage_dict[k]) for k in nimage_dict.keys() if len(nimage_dict[k]) != 0]

In [12]:
np.mean(pli)

109.04944405015377

In [13]:
np.mean(nli)

70.19310517112166

In [14]:
print("Mean : positive -> {} ; negative - > {}".format(np.mean(pli), np.mean(nli)))
print("SD : positive -> {} ; negative - > {}".format(np.std(pli), np.std(nli)))
print("MAX : positive -> {} ; negative - > {}".format(np.max(pli), np.max(nli)))
print("MIN : positive -> {} ; negative - > {}".format(np.min(pli), np.min(nli)))

Mean : positive -> 109.04944405015377 ; negative - > 70.19310517112166
SD : positive -> 179.95671263153318 ; negative - > 142.99156615490338
MAX : positive -> 3264 ; negative - > 6624
MIN : positive -> 2 ; negative - > 2


In [15]:
import json
with open("pimage_dict_ho.json", "w") as outfile:
    json.dump(pimage_dict, outfile)
    
with open("nimage_dict_ho.json", "w") as outfile:
    json.dump(nimage_dict, outfile)    

In [16]:
pli = [len(pimage_dict[k]) for k in pimage_dict.keys() if len(pimage_dict[k]) != 0]
nli = [len(nimage_dict[k]) for k in nimage_dict.keys() if len(nimage_dict[k]) != 0]
print(len(pli))
print(len(nli))
print(sum(pli))
print(sum(nli))

4227
4003
460952
280983


In [17]:
thresh = 100
pli = [len(pimage_dict[k]) for k in pimage_dict.keys() if len(pimage_dict[k]) <= thresh]
nli = [len(nimage_dict[k]) for k in nimage_dict.keys() if len(nimage_dict[k]) <= thresh]
print(len(pli))
print(len(nli))
print(sum(pli))
print(sum(nli))

3275
3806
127085
125702


In [18]:
import sys
import pickle

In [19]:
minDiff = sys.maxsize
a = 0
b = 0
idx = 0 
for i in range(0, 201, 1):
    thresh = i
    pli = [len(pimage_dict[k]) for k in pimage_dict.keys() if len(pimage_dict[k]) <= thresh]
    nli = [len(nimage_dict[k]) for k in nimage_dict.keys() if len(nimage_dict[k]) <= thresh]
    if (minDiff > (sum(pli) - sum(nli))):
        minDiff = abs(sum(pli) - sum(nli))
        idx, a, b = i, sum(pli), sum(nli)
print(a)
print(b)
print(idx)

125385
124902
99


In [20]:
thresh = 97
pli = [len(pimage_dict[k]) for k in pimage_dict.keys() if len(pimage_dict[k]) <= thresh]
nli = [len(nimage_dict[k]) for k in nimage_dict.keys() if len(nimage_dict[k]) <= thresh]
print(len(pli))
print(len(nli))
print(sum(pli))
print(sum(nli))

3208
3772
120456
122342


In [21]:
image_keys_p = [k for k in pimage_dict.keys() if len(pimage_dict[k]) <= thresh]
image_keys_n = [k for k in nimage_dict.keys() if len(nimage_dict[k]) <= thresh]
sub_p = [pimage_dict[k] for k in pimage_dict.keys() if len(pimage_dict[k]) <= thresh]
sub_n = [nimage_dict[k] for k in nimage_dict.keys() if len(nimage_dict[k]) <= thresh]

In [22]:
sub_p = [item for sublist in sub_p for item in sublist]
sub_n = [item for sublist in sub_n for item in sublist]

In [23]:
with open('equal_all_ho.pkl', 'wb') as file:
    pickle.dump([image_keys_p, image_keys_n, sub_p, sub_n], file)