In [12]:
import cv2 as cv
import numpy as np
import os
import pandas as pd

In [13]:
# Function for opening image
def get_img(img_path, grayscale=True):
    img = cv.imread(img_path)
    if grayscale:
        return cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    return img

# Function for cropping image
def crop_img(img):
    img_centerX = img.shape[1] // 2
    img_centerY = img.shape[0] // 2

    # Initialize mask to use
    mask = np.zeros(img.shape, dtype=np.uint8)

    # Crop the image
    region_to_cut = cv.ellipse(mask, (img_centerX, img_centerY), (547, 547), 0, 0, 360, 255, -1)
    cropped_img = cv.bitwise_or(img, img, mask=region_to_cut)
    return cropped_img[:, img_centerX-600:img_centerX+600]

# Gaussian Blur Function
def gaussian_blur(img, kernel_a, kernel_b):
    return cv.GaussianBlur(img, (kernel_a, kernel_b), 0)

# Remove the background from the fry image
def img_diff(img, bg_img):
    return cv.absdiff(img, bg_img)

# Image Segmentation Using Thresholding Function
def threshold_img(diff_img, type='OTSU', block=0, c=0):
    if type == 'OTSU':
        _, thresh_img = cv.threshold(diff_img, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
        return thresh_img
    return cv.adaptiveThreshold(diff_img, 256, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, block, c)

# Histogram Counter
def histogram_counter(thres_img, pixel_to_count='white'):
    if pixel_to_count == 'white':
        return np.count_nonzero(thres_img == 255)
    return np.count_nonzero(thres_img == 0)

# Countour Detection
def contour_detection(thres_img):
    contours, _ = cv.findContours(thres_img, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    return contours

In [14]:
# Path to the images (100)
path = './IMAGES/100/'
list_images_100 = [f for f in os.listdir(path) if f.startswith('my_photo')]
list_paths_100 = [path + f for f in list_images_100]

# Path to the images (200)
path = './IMAGES/200/'
list_images_200 = [f for f in os.listdir(path) if f.startswith('my_photo')]
list_paths_200 = [path + f for f in list_images_200]

# Path to the images (300)
path = './IMAGES/300/'
list_images_300 = [f for f in os.listdir(path) if f.startswith('my_photo')]
list_paths_300 = [path + f for f in list_images_300]

# Path to the images (400)
path = './IMAGES/400/'
list_images_400 = [f for f in os.listdir(path) if f.startswith('my_photo')]
list_paths_400 = [path + f for f in list_images_400]

Initialize Dataframes

In [15]:
df = pd.DataFrame(columns=['white_pixels', 'contours', 'label'])
os.makedirs('./exported_df', exist_ok=True)

In [16]:
# Image with 100
df_100 = df.copy()
for img_path in list_paths_100:
    # Initialize the images
    img = get_img(img_path)
    bg_img = get_img('./IMAGES/100/background.jpg')

    # Crop the images
    img = crop_img(img)
    bg_img = crop_img(bg_img)

    # Image Difference
    diff_img = img_diff(img, bg_img)

    # Adaptive Gaussian Threshold
    thresh_img = threshold_img(diff_img, type="ADAPTIVE", block=37, c=5)
    cv.imshow('sample', thresh_img)

    # Histogram (White Pixel) Counter
    white_pixels = histogram_counter(thresh_img)

    # Length of Contours
    contours = contour_detection(thresh_img)
    df_100 = pd.concat([df_100, pd.DataFrame({'white_pixels': [white_pixels], 'contours': [len(contours)], 'label': [100]})], ignore_index=True)
    
df_100.head()

Unnamed: 0,white_pixels,contours,label
0,1269378,827,100
1,1269766,808,100
2,1267853,875,100
3,1268965,808,100
4,1270473,812,100


In [17]:
# Image with 200
df_200 = df.copy()
for img_path in list_paths_200:
    # Initialize the images
    img = get_img(img_path)
    bg_img = get_img('./IMAGES/100/background.jpg')

    # Crop the images
    img = crop_img(img)
    bg_img = crop_img(bg_img)

    # Image Difference
    diff_img = img_diff(img, bg_img)

    # Adaptive Gaussian Threshold
    thresh_img = threshold_img(diff_img, type="ADAPTIVE", block=37, c=5)
    cv.imshow('sample', thresh_img)

    # Histogram (White Pixel) Counter
    white_pixels = histogram_counter(thresh_img)

    # Length of Contours
    contours = contour_detection(thresh_img)
    df_200 = pd.concat([df_200, pd.DataFrame({'white_pixels': [white_pixels], 'contours': [len(contours)], 'label': [200]})], ignore_index=True)
    
df_200.head()

Unnamed: 0,white_pixels,contours,label
0,1224473,2486,200
1,1225664,2463,200
2,1225652,2478,200
3,1225462,2413,200
4,1225731,2401,200


In [18]:
# Image with 300
df_300 = df.copy()
for img_path in list_paths_300:
    # Initialize the images
    img = get_img(img_path)
    bg_img = get_img('./IMAGES/100/background.jpg')

    # Crop the images
    img = crop_img(img)
    bg_img = crop_img(bg_img)

    # Image Difference
    diff_img = img_diff(img, bg_img)

    # Adaptive Gaussian Threshold
    thresh_img = threshold_img(diff_img, type="ADAPTIVE", block=37, c=5)
    cv.imshow('sample', thresh_img)

    # Histogram (White Pixel) Counter
    white_pixels = histogram_counter(thresh_img)

    # Length of Contours
    contours = contour_detection(thresh_img)
    df_300 = pd.concat([df_300, pd.DataFrame({'white_pixels': [white_pixels], 'contours': [len(contours)], 'label': [300]})], ignore_index=True)
    
df_300.head()

Unnamed: 0,white_pixels,contours,label
0,1200940,2590,300
1,1199481,2664,300
2,1202799,2631,300
3,1201549,2731,300
4,1203382,2732,300


In [19]:
# Image with 400
df_400 = df.copy()
for img_path in list_paths_400:
    # Initialize the images
    img = get_img(img_path)
    bg_img = get_img('./IMAGES/100/background.jpg')

    # Crop the images
    img = crop_img(img)
    bg_img = crop_img(bg_img)

    # Image Difference
    diff_img = img_diff(img, bg_img)

    # Adaptive Gaussian Threshold
    thresh_img = threshold_img(diff_img, type="ADAPTIVE", block=37, c=5)
    cv.imshow('sample', thresh_img)

    # Histogram (White Pixel) Counter
    white_pixels = histogram_counter(thresh_img)

    # Length of Contours
    contours = contour_detection(thresh_img)
    df_400 = pd.concat([df_400, pd.DataFrame({'white_pixels': [white_pixels], 'contours': [len(contours)], 'label': [400]})], ignore_index=True)
    
df_400.head()

Unnamed: 0,white_pixels,contours,label
0,1165059,3060,400
1,1164861,3150,400
2,1162460,3250,400
3,1164710,3307,400
4,1163232,3476,400


In [20]:
# Merge Dataframes
df = pd.concat([df_100, df_200, df_300, df_400], ignore_index=True)
df.head()

Unnamed: 0,white_pixels,contours,label
0,1269378,827,100
1,1269766,808,100
2,1267853,875,100
3,1268965,808,100
4,1270473,812,100


In [21]:
# Randomize the Dataframe
df = df.sample(frac=1).reset_index(drop=True)
df.head()

Unnamed: 0,white_pixels,contours,label
0,1265864,798,100
1,1170137,3168,400
2,1199623,2749,300
3,1269252,847,100
4,1199448,2769,300


In [22]:
# Save the Dataframe to a CSV file
df.to_csv('./exported_df/training.csv', index=False)