# Image classification using deep learning

In [1]:
%load_ext autoreload
%autoreload 2

## Entrenamiento de una red VGG16

In [2]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image

import tensorflow as tf
from tensorflow import keras
from keras import layers, models, optimizers
from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from bcd.utils.paths import data_raw_dir, models_dir
from bcd.data.make_dataset import ImageDataGenerator

sns.set_theme()

## Load info

In [3]:
dataset = pd.read_csv(data_raw_dir("train.csv"))
dataset["image_name"] = dataset["patient_id"].astype(str) + "_" + dataset["image_id"].astype(str) + ".png"

target = 'cancer'

display(dataset)

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,implant,machine_id,prediction_id,cancer,image_name
0,2,10006,462822612,L,CC,61.0,0,29,10006_L,0,10006_462822612.png
1,2,10006,1459541791,L,MLO,61.0,0,29,10006_L,0,10006_1459541791.png
2,2,10006,1864590858,R,MLO,61.0,0,29,10006_R,0,10006_1864590858.png
3,2,10006,1874946579,R,CC,61.0,0,29,10006_R,0,10006_1874946579.png
4,2,10011,220375232,L,CC,55.0,0,21,10011_L,0,10011_220375232.png
...,...,...,...,...,...,...,...,...,...,...,...
54701,1,9973,1729524723,R,MLO,43.0,0,49,9973_R,0,9973_1729524723.png
54702,1,9989,63473691,L,MLO,60.0,0,216,9989_L,0,9989_63473691.png
54703,1,9989,1078943060,L,CC,60.0,0,216,9989_L,0,9989_1078943060.png
54704,1,9989,398038886,R,MLO,60.0,0,216,9989_R,0,9989_398038886.png


In [4]:
X_train, X_test, y_train, y_test = train_test_split(dataset['image_name'], dataset[target], test_size=0.2, random_state=123)

## Create image data generator

In [5]:
IMG_SIZE = (256, 256)
BATCH_SIZE = 32
EPOCHS = 20
VALIDATION_SPLIT = 0.2

In [6]:
x_train_paths = ("train/" + y_train.astype(str) + "/" + X_train).apply(data_raw_dir).to_list()
x_test_paths = ("train/" + y_test.astype(str) + "/" + X_test).apply(data_raw_dir).to_list()

train_generator = ImageDataGenerator(
    x_train_paths, 
    y_train.to_numpy(), 
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE
)


## Simple neural network

In [7]:
# def simple_nn():
#     model = tf.keras.models.Sequential([
#         tf.keras.layers.Flatten(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 1)),
#         tf.keras.layers.Dense(128, activation='relu'),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Dense(10)
#     ])
    
#     return model

# model = simple_nn()
# model.compile(
#     optimizer='adam',
#     loss='binary_crossentropy',
#     metrics=['accuracy']
# )

# # Train the model
# history = model.fit(train_generator, epochs=5)

## Modified LeNet

In [8]:
def modified_lenet():
    model = models.Sequential([
        layers.Conv2D(6, (5, 5), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(16, (5, 5), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(120, activation='relu'),
        layers.Dense(84, activation='relu'),
        layers.Dense(1, activation='sigmoid')  # Salida binaria
    ])
    return model

model = modified_lenet()
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(train_generator, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Save model

In [None]:
# model.save(models_dir('lenet5.h5'))