In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import glob as glob
import cv2
import os
import numpy as np
import csv
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
import imgaug.augmenters as iaa
from keras.preprocessing.image import ImageDataGenerator
from xgboost import XGBClassifier

In [None]:
train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Blood Disease Classification/train.csv')
test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Blood Disease Classification/test.csv')

In [None]:
directory = '/content/drive/MyDrive/Colab Notebooks/Blood Disease Classification/Images'

# create a list of filepaths for all JPEG images in the directory
jpeg_files = [os.path.join(directory, filename) for filename in os.listdir(directory)
              if os.path.isfile(os.path.join(directory, filename))
              and filename.lower().endswith('.jpg')]

# create a dictionary to map filepaths to image data
jpeg_images = {filepath: None for filepath in jpeg_files}

# create a new column 'Filepath' in the train dataframe
train['Filepath'] = train['Image'].apply(lambda x: os.path.join(directory, f"BloodImage_{x:05d}.jpg"))
train = train[train['Filepath'].isin(jpeg_files)] # exclude any rows with invalid filepaths

# create a new column 'Filepath' in the test dataframe
test['Filepath'] = test['Image'].apply(lambda x: os.path.join(directory, f"BloodImage_{x:05d}.jpg"))
test = test[test['Filepath'].isin(jpeg_files)] # exclude any rows with invalid filepaths

# check that all original columns are preserved in the train and test dataframes
print(train.columns)
print(test.columns)

Index(['Image', 'Category', 'Filepath'], dtype='object')
Index(['Image', 'Filepath'], dtype='object')


In [None]:
jpeg_images = {filepath: None for filepath in jpeg_files}

# load JPEG images and convert to numpy arrays
for filepath in jpeg_files:
    with Image.open(filepath) as img:
        jpeg_images[filepath] = np.array(img)

# create a new column 'Image' in the train dataframe
train['Image'] = train['Filepath'].map(jpeg_images)

# create a new column 'Image' in the test dataframe
test['Image'] = test['Filepath'].map(jpeg_images)

# check that all original columns are preserved in the train and test dataframes
print(train.columns)
print(test.columns)

# print the shape of the first image in the train dataframe
print(train['Image'][0].shape)

Index(['Image', 'Category', 'Filepath'], dtype='object')
Index(['Image', 'Filepath'], dtype='object')
(480, 640, 3)


In [None]:
def create_mask_for_image(image):
    # apply a threshold to the image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # apply a morphological transformation to fill in any holes in the image
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    mask = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    arr = mask.reshape(-1)
    return arr

def deskew_image(image):
    # create a mask for the image
    mask = create_mask_for_image(image)

    # apply a perspective transform to deskew the image
    coords = np.column_stack(np.where(mask > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

def gray_image(image):
    # create a mask for the image
    mask = create_mask_for_image(image)

    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # apply the mask to the grayscale image
    output = cv2.bitwise_and(gray, gray, mask=mask)
    return output

def thresh_image(image):
    # create a mask for the image
    mask = create_mask_for_image(image)

    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # apply a threshold to the grayscale image
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # apply the mask to the thresholded image
    output = cv2.bitwise_and(thresh, thresh, mask=mask)
    return output

def rnoise_image(image):
    # create a mask for the image
    mask = create_mask_for_image(image)

    # apply a median filter to remove salt and pepper noise from the image
    median = cv2.medianBlur(image, 3)

    # apply the mask to the denoised image
    output = cv2.bitwise_and(median, median, mask=mask)
    return output

def dilate_image(image):
    # create a mask for the image
    mask = create_mask_for_image(image)

    # apply a dilation transformation to the image
    kernel = np.ones((3,3), np.uint8)
    dilated = cv2.dilate(image, kernel, iterations=1)

    # apply the mask to the dilated image
    output = cv2.bitwise_and(dilated, dilated, mask=mask)
    return output

def erode_image(image):
    # create a mask for the image
    mask = create_mask_for_image(image)

    # apply an erosion transformation to the image
    kernel = np.ones((3,3), np.uint8)
    eroded = cv2.erode(image, kernel, iterations=1)

In [None]:
train['create_mask_for_image'] = train['Image'].apply(create_mask_for_image)
# train['deskew_image'] = train['Image'].apply(deskew_image)
# train['gray_image'] = train['Image'].apply(gray_image)
# train['thresh_image'] = train['Image'].apply(thresh_image)
# train['rnoise_image'] = train['Image'].apply(rnoise_image)
# train['dilate_image'] = train['Image'].apply(dilate_image)
#train['erode_image'] = train['Image'].apply(erode_image)

In [None]:
train.head()

Unnamed: 0,Image,Category,Filepath,create_mask_for_image
0,"[[[182, 167, 172], [178, 163, 168], [172, 155,...",NEUTROPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ..."
1,"[[[173, 156, 128], [171, 154, 128], [169, 151,...",NEUTROPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ..."
2,"[[[178, 184, 170], [177, 183, 169], [179, 183,...",NEUTROPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[[[173, 164, 155], [172, 163, 154], [172, 163,...",NEUTROPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ..."
4,"[[[184, 190, 178], [181, 187, 175], [181, 187,...",EOSINOPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [None]:
def normalize(image):
  mean = np.mean(image)
  std = np.std(image)
  return (image - mean) / std

train['create_mask_for_image'] = train['create_mask_for_image'].apply(lambda x : normalize(x))
# train['deskew_image'] = train['deskew_image'].apply(lambda x : normalize(x))
# train['gray_image'] = train['gray_image'].apply(lambda x : normalize(x))
# train['thresh_image'] = train['thresh_image'].apply(lambda x : normalize(x))
# train['rnoise_image'] = train['rnoise_image'].apply(lambda x : normalize(x))
# train['dilate_image'] = train['dilate_image'].apply(lambda x : normalize(x))
#train['erode_image'] = train['erode_image'].apply(lambda x : normalize(x))

In [None]:
train.head()

Unnamed: 0,Image,Category,Filepath,create_mask_for_image
0,"[[[182, 167, 172], [178, 163, 168], [172, 155,...",NEUTROPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[1.260450299708076, 1.260450299708076, 1.26045..."
1,"[[[173, 156, 128], [171, 154, 128], [169, 151,...",NEUTROPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[1.0282973495892227, 1.0282973495892227, 1.028..."
2,"[[[178, 184, 170], [177, 183, 169], [179, 183,...",NEUTROPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[-0.9115331804209794, -0.9115331804209794, -0...."
3,"[[[173, 164, 155], [172, 163, 154], [172, 163,...",NEUTROPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[1.1885485058813887, 1.1885485058813887, 1.188..."
4,"[[[184, 190, 178], [181, 187, 175], [181, 187,...",EOSINOPHIL,/content/drive/MyDrive/Colab Notebooks/Blood D...,"[-0.9764872831977968, -0.9764872831977968, -0...."


In [None]:
le = LabelEncoder()
y = le.fit_transform(train['Category'])
X = train.drop(['Category','Filepath'],axis=1)

In [None]:
X.head()

Unnamed: 0,Image,create_mask_for_image
0,"[[[182, 167, 172], [178, 163, 168], [172, 155,...","[1.260450299708076, 1.260450299708076, 1.26045..."
1,"[[[173, 156, 128], [171, 154, 128], [169, 151,...","[1.0282973495892227, 1.0282973495892227, 1.028..."
2,"[[[178, 184, 170], [177, 183, 169], [179, 183,...","[-0.9115331804209794, -0.9115331804209794, -0...."
3,"[[[173, 164, 155], [172, 163, 154], [172, 163,...","[1.1885485058813887, 1.1885485058813887, 1.188..."
4,"[[[184, 190, 178], [181, 187, 175], [181, 187,...","[-0.9764872831977968, -0.9764872831977968, -0...."


In [None]:
X_train ,X_val ,y_train ,y_val = train_test_split(X,y,test_size=.2,random_state=42)

In [None]:
def reshape_array(arr):
    """
    Reshape a 3D numpy array to a 2D numpy array.
    """
    return arr.reshape(-1)

X_train['Image'] = X_train['Image'].apply(reshape_array)
X_train['create_mask_for_image'] = X_train['create_mask_for_image'].apply(reshape_array)
#X_val = X_val.apply(reshape_array)

In [None]:
X_train

Unnamed: 0,Image,create_mask_for_image
127,"[187, 192, 172, 184, 189, 169, 184, 189, 169, ...","[-0.9394776533542456, -0.9394776533542456, -0...."
66,"[177, 192, 189, 175, 190, 187, 174, 189, 186, ...","[-1.0086178615685901, -1.0086178615685901, -1...."
73,"[160, 138, 151, 159, 137, 150, 159, 137, 150, ...","[1.1113762456916993, 1.1113762456916993, 1.111..."
96,"[159, 138, 143, 159, 139, 141, 161, 141, 143, ...","[1.0935798859204062, 1.0935798859204062, 1.093..."
119,"[177, 185, 170, 177, 185, 170, 177, 185, 170, ...","[-1.0011790805316338, -1.0011790805316338, -1...."
...,...,...
106,"[174, 147, 164, 171, 146, 165, 171, 146, 165, ...","[1.3681691852444804, 1.3681691852444804, 1.368..."
14,"[168, 138, 140, 167, 137, 139, 166, 136, 136, ...","[1.1253321952840831, 1.1253321952840831, 1.125..."
92,"[182, 177, 174, 179, 173, 173, 178, 172, 172, ...","[1.2071238067361598, 1.2071238067361598, 1.207..."
179,"[169, 159, 158, 168, 158, 159, 167, 157, 158, ...","[1.1039653038086867, 1.1039653038086867, 1.103..."


In [None]:
type(y_train)

numpy.ndarray

In [None]:
type(X_train)

pandas.core.frame.DataFrame

In [None]:
X_train.head()

Unnamed: 0,Image,create_mask_for_image
127,"[187, 192, 172, 184, 189, 169, 184, 189, 169, ...","[-0.9394776533542456, -0.9394776533542456, -0...."
66,"[177, 192, 189, 175, 190, 187, 174, 189, 186, ...","[-1.0086178615685901, -1.0086178615685901, -1...."
73,"[160, 138, 151, 159, 137, 150, 159, 137, 150, ...","[1.1113762456916993, 1.1113762456916993, 1.111..."
96,"[159, 138, 143, 159, 139, 141, 161, 141, 143, ...","[1.0935798859204062, 1.0935798859204062, 1.093..."
119,"[177, 185, 170, 177, 185, 170, 177, 185, 170, ...","[-1.0011790805316338, -1.0011790805316338, -1...."


In [None]:
# def reshape_array(arr):
#     """
#     Reshape a 3D numpy array to a 2D numpy array.
#     """
#     return arr.reshape(-1)

# X_train = X_train.apply(reshape_array)
# X_val = X_val.apply(reshape_array)

In [None]:
rf = RandomForestClassifier()

In [None]:
a = np.array(X_train)
b = np.array(y_train)

In [None]:
a = a.reshape(1,-1)

In [None]:
# b = b.reshape(-1,1)
# b

In [None]:
print(X_train.shape)
print(y_train.shape)

(178, 2)
(178,)


In [None]:
X_train

Unnamed: 0,Image,create_mask_for_image
127,"[187, 192, 172, 184, 189, 169, 184, 189, 169, ...","[-0.9394776533542456, -0.9394776533542456, -0...."
66,"[177, 192, 189, 175, 190, 187, 174, 189, 186, ...","[-1.0086178615685901, -1.0086178615685901, -1...."
73,"[160, 138, 151, 159, 137, 150, 159, 137, 150, ...","[1.1113762456916993, 1.1113762456916993, 1.111..."
96,"[159, 138, 143, 159, 139, 141, 161, 141, 143, ...","[1.0935798859204062, 1.0935798859204062, 1.093..."
119,"[177, 185, 170, 177, 185, 170, 177, 185, 170, ...","[-1.0011790805316338, -1.0011790805316338, -1...."
...,...,...
106,"[174, 147, 164, 171, 146, 165, 171, 146, 165, ...","[1.3681691852444804, 1.3681691852444804, 1.368..."
14,"[168, 138, 140, 167, 137, 139, 166, 136, 136, ...","[1.1253321952840831, 1.1253321952840831, 1.125..."
92,"[182, 177, 174, 179, 173, 173, 178, 172, 172, ...","[1.2071238067361598, 1.2071238067361598, 1.207..."
179,"[169, 159, 158, 168, 158, 159, 167, 157, 158, ...","[1.1039653038086867, 1.1039653038086867, 1.103..."


In [None]:
y

array([3, 3, 3, 3, 0, 0, 3, 1, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3,
       3, 3, 0, 0, 3, 3, 2, 0, 3, 1, 1, 3, 3, 3, 2, 3, 3, 3, 2, 3, 0, 3,
       0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 1, 1, 3, 3, 3, 2, 0, 3, 3, 3, 0,
       0, 1, 0, 3, 3, 3, 3, 3, 0, 3, 0, 1, 3, 0, 0, 3, 0, 3, 3, 3, 3, 0,
       2, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 0, 3, 0, 1, 3, 0, 3, 3,
       0, 0, 3, 3, 3, 1, 3, 3, 3, 2, 1, 0, 3, 3, 3, 0, 1, 3, 3, 3, 0, 0,
       1, 3, 3, 2, 0, 0, 0, 0, 3, 3, 1, 3, 1, 0, 3, 3, 3, 3, 3, 0, 3, 3,
       3, 0, 2, 3, 0, 2, 3, 0, 3, 3, 0, 3, 0, 3, 3, 3, 2, 3, 3, 3, 1, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       0, 3, 3, 1, 0, 0, 1, 2, 0, 3, 0, 0, 0, 3, 3, 1, 3, 3, 3, 3, 3, 0,
       0, 0, 3])

In [None]:
X_train

Unnamed: 0,Image,create_mask_for_image
127,"[187, 192, 172, 184, 189, 169, 184, 189, 169, ...","[-0.9394776533542456, -0.9394776533542456, -0...."
66,"[177, 192, 189, 175, 190, 187, 174, 189, 186, ...","[-1.0086178615685901, -1.0086178615685901, -1...."
73,"[160, 138, 151, 159, 137, 150, 159, 137, 150, ...","[1.1113762456916993, 1.1113762456916993, 1.111..."
96,"[159, 138, 143, 159, 139, 141, 161, 141, 143, ...","[1.0935798859204062, 1.0935798859204062, 1.093..."
119,"[177, 185, 170, 177, 185, 170, 177, 185, 170, ...","[-1.0011790805316338, -1.0011790805316338, -1...."
...,...,...
106,"[174, 147, 164, 171, 146, 165, 171, 146, 165, ...","[1.3681691852444804, 1.3681691852444804, 1.368..."
14,"[168, 138, 140, 167, 137, 139, 166, 136, 136, ...","[1.1253321952840831, 1.1253321952840831, 1.125..."
92,"[182, 177, 174, 179, 173, 173, 178, 172, 172, ...","[1.2071238067361598, 1.2071238067361598, 1.207..."
179,"[169, 159, 158, 168, 158, 159, 167, 157, 158, ...","[1.1039653038086867, 1.1039653038086867, 1.103..."


In [None]:
print(X_train.shape)
print(y_train.shape)

(178, 2)
(178,)


In [None]:
n_features = X_train.shape[1]
image_features = np.expand_dims(X_train, axis=0)
X_for_RF = np.reshape(image_features, (X_train.shape[0], -1))

In [None]:
print(X_for_RF.shape)
print(y_train.shape)

(178, 2)
(178,)


In [None]:
X_for_RF

In [None]:
rf.fit(X_for_RF,y_train)

ValueError: ignored

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
model3 = ExtraTreesClassifier()
model3.fit(X_train,y_train)

In [None]:
y_pred = model3.predict(X_val.tolist())
print(classification_report(y_val, y_pred))

In [None]:
test1 = test['Image'].apply(reshape_array)
final_pred = model3.predict(test1.tolist())
print(classification_report(y_val, test1))

In [None]:
test1