# Dataset is a folder called 'poultry_diseases' which has 4 folders:
- cocci
- healthy
- ncd
- salmo

each folder contains jpg images of that type of disease

### Step 1: First figure out how many images per category
- How many images are in each directory
- Dimensions of the images
- in RGB images there are three color channels and three dimensions
- grayscale images are single-dimensional

In [1]:
import numpy as np
import cv2
import os
import warnings
warnings.filterwarnings("ignore")

#import time
#from PIL import Image

In [2]:
os.listdir("poultry_diseases/ncd/")[1:5] # get a sample of the data image file names

['ncd.270.jpg', 'ncd.264.jpg', 'ncd.81.jpg', 'ncd.258.jpg']

In [3]:
img = cv2.imread("poultry_diseases/ncd/ncd.0.jpg")
print(type(img))
print(len(img))
print(img.shape, end='\n\n')
print(type(img[0]), len(img[0]), img[0])
print(type(img[0][0]), len(img[0][0]), img[0][0])
print(type(img[0][0][0]), len(img[0][0]), img[0][0][1])

<class 'numpy.ndarray'>
1152
(1152, 2048, 3)

<class 'numpy.ndarray'> 2048 [[ 65  77  79]
 [ 64  76  78]
 [ 64  77  79]
 ...
 [177 176 172]
 [178 176 175]
 [178 176 175]]
<class 'numpy.ndarray'> 3 [65 77 79]
<class 'numpy.uint8'> 3 77


In [4]:
def generate_summary(disease_type):
    """Prints a summary of all images of a certain disease type\n
    the dimension of the images
    """
    shapes = []
    for file_name in os.listdir(f"poultry_diseases/{disease_type}/"):
        img = cv2.imread(f"poultry_diseases/{disease_type}/{file_name}")
        shapes.append(img.shape)
    shapes = np.array(shapes)

    output = f"image count: {len(shapes)}\n"\
    f"mean vertical length: {np.mean(shapes[:, 0])}\n"\
    f"mean horizontal length: {np.mean(shapes[:, 1])}\n"\
    f"median vertical length: {np.median(shapes[:, 0])}\n"\
    f"median horizontal length: {np.median(shapes[:, 1])}\n"\
    f"Unique shapes in dataset:\n"\
    f"{np.unique(shapes, axis=0)}"
    
    return output

In [5]:
print('\nSummary of all Newcastle disease images:')
print(generate_summary('ncd'))

print('\nSummary of all healthy poultry images:')
print(generate_summary('healthy'))


Summary of all Newcastle disease images:
image count: 376
mean vertical length: 1706.723404255319
mean horizontal length: 2113.7021276595747
median vertical length: 1152.0
median horizontal length: 2048.0
Unique shapes in dataset:
[[1152 2048    3]
 [1440 3200    3]
 [3200 1440    3]]

Summary of all healthy poultry images:
image count: 2057
mean vertical length: 1565.1142440447254
mean horizontal length: 2122.819640252795
median vertical length: 1152.0
median horizontal length: 2048.0
Unique shapes in dataset:
[[1152 2048    3]
 [2048 1152    3]
 [3120 4160    3]
 [4160 3120    3]]


In [6]:
print('\nSummary of all coccidiosis disease images:')
print(generate_summary('cocci'))

print('\nSummary of all salmonella disease images:')
print(generate_summary('salmo'))


Summary of all coccidiosis disease images:
image count: 2103
mean vertical length: 2726.870185449358
mean horizontal length: 2028.0589633856396
median vertical length: 3200.0
median horizontal length: 1440.0
Unique shapes in dataset:
[[1440 3200    3]
 [2448 3264    3]
 [3200 1440    3]
 [3264 2448    3]
 [4160 3120    3]]

Summary of all salmonella disease images:
image count: 2276
mean vertical length: 2216.2390158172234
mean horizontal length: 2102.699472759227
median vertical length: 2048.0
median horizontal length: 2048.0
Unique shapes in dataset:
[[1152 2048    3]
 [1440 3200    3]
 [2048 1152    3]
 [2448 3264    3]
 [3200 1440    3]
 [3264 2448    3]
 [4160 3120    3]]


### Step 2: Crop the image
- We saw that the lowest averages and medians for image dimensions was above 1000
- This means most images are more than 1000 pixels in width and height
- So we felt a square crop of 800x800 pixels was a good crop size
- this also reduces the dimensionality of the array so we can flatten it into a 1d array at the end

In [7]:
"""Attempt central 800x800 pixel square crop for test image"""
img = cv2.imread("poultry_diseases/ncd/ncd.0.jpg")
# Calculate center position
center_y = img.shape[0] // 2
center_x = img.shape[1] // 2
# Calculate crop boundaries (400 pixels in each direction from center)
start_y = center_y - 400
start_x = center_x - 400
end_y = center_y + 400
end_x = center_x + 400
cropped_img = img[start_y:end_y, start_x:end_x]

print("Original image vertical length:", img.shape[0])
print("Horizontal image length:", img.shape[1])
print("Original image dimensions:", img.shape)
print("Cropped image dimensions:", cropped_img.shape)

cv2.imshow('original', img)
cv2.imshow('cropped', cropped_img)
while True:
    k = cv2.waitKey(1) & 0xFF
    # when escape pressed or if window goes out of focus, close image window
    if k == 27 or \
    cv2.getWindowProperty('original', cv2.WND_PROP_VISIBLE) < 1 or \
    cv2.getWindowProperty('cropped', cv2.WND_PROP_VISIBLE) < 1:
        break
cv2.destroyAllWindows()

Original image vertical length: 1152
Horizontal image length: 2048
Original image dimensions: (1152, 2048, 3)
Cropped image dimensions: (800, 800, 3)


2025-03-28 09:55:29.098 Python[96057:6902270] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-28 09:55:29.098 Python[96057:6902270] +[IMKInputSession subclass]: chose IMKInputSession_Modern


### Step 3: Grayscale image
- Read in image
- Apply grayscale using OpenCV

In [None]:
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('original', img)
cv2.imshow('grayscale', gray_img)
while True:
    k = cv2.waitKey(1) & 0xFF
    # when escape pressed or if window goes out of focus, close image window
    if k == 27 or \
    cv2.getWindowProperty('original', cv2.WND_PROP_VISIBLE) < 1 or \
    cv2.getWindowProperty('grayscale', cv2.WND_PROP_VISIBLE) < 1:
        break
cv2.destroyAllWindows()

### Step 4: Flatten image into a 1 dimensional array

In [None]:
# grayscale the cropped img, rather than the original
gray_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)
# we can use ravel() or flatten()
# ravel() creates view, flatten() creates copy
preprocessed_img = gray_img.ravel()
# we can see the preprocessed image is 1 dimensional array
print(preprocessed_img.shape)

(640000,)


: 