# White Blood Cell Image Classification
### By [Anthony Medina](https://www.linkedin.com/in/anthony-medina-math/)

# Modeling Notebook
1. Notebook Objectives
2. Imports
3. Final Pre-Building Checks
4. Model 1 Neural Network
5. Model 2 Random Forest
6. Model 3 Gradient Boosting Machine
7. Model results analysis
8. Model Choice
9. Next Steps

### 1. Notebook Objectives

This notebook will house the model building, evaluation of each model, and picking the model with best Recall score.

### 2. Imports

In [17]:
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
import os
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, recall_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [5]:
df = pd.read_csv('../cleaned_data/cleaned_data.csv')
df.head()

Unnamed: 0,cell_key,image_array
0,0,[0.01176471 0. 0. ... 0. ...
1,0,[0. 0. 0. ... 0.760784...
2,0,[0. 0. 0. ... 0. 0. 0.]
3,0,[0. 0. 0. ... 0.764705...
4,0,[0. 0. 0. ... 0. 0. 0.]


In [6]:
df.dtypes

cell_key        int64
image_array    object
dtype: object

In [39]:
# df['image_array'] = df['image_array'].apply(lambda x: np.fromstring(x[1:-1], sep='\n'))

In [44]:
# I added this block because importing my clean data was a nightmare.
# New Array that will contain the final values I need to save for modeling.
#import cv2

column_names = ['cell_name', 'image_array']

# Create a blank DataFrame with column names
df = pd.DataFrame(columns=column_names)

# Populating the data frame from the 4 different types of images
cell_names = ['neutrophil', 'monocyte', 'lymphocyte', 'eosinophil']

images = []
labels = []
for index, cell_name in enumerate(cell_names):
    print(index, cell_name)
    directory_path = '../raw_data/organized_data_set/images/' + cell_name
    count = 0
    for filename in os.listdir(directory_path):
        if count < 100:
            count +=1
            if os.path.isfile(os.path.join(directory_path, filename)):
                file_path = os.path.abspath(os.path.join(directory_path, filename))
    #            print(file_path)
                image = mpimg.imread(file_path) # First it's an image
    #            image = cv2.imread(file_path)
                first_array = np.array(image) # Then it's an array
    #            reshaped_array = first_array.flatten() # Now it's a flat array
                float_array = first_array.astype('float32') # Now it's an array of floats
                rescaled_array = float_array / 255.0 # Rescaling the float
                new_entry = {"cell_name": cell_name, 'image_array': rescaled_array}
                df.loc[len(df)] = new_entry
                images.append(image)
                labels.append(cell_name)
                
                
X = np.array(images)
y = np.array(labels)

0 neutrophil
1 monocyte
2 lymphocyte
3 eosinophil


In [45]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

In [46]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [48]:
def create_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(240, 320, 3)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(len(set(y_encoded)), activation='softmax'))  # Number of classes
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model

model = create_model()
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Train on 256 samples, validate on 64 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x16f3733d588>

In [1]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test loss: {loss}, Test accuracy: {recall}")

NameError: name 'model' is not defined