In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# 1. Prepare data

In [2]:
FILE_ID = '1AhhYo_QWV9j6DCoisJJaZwJoB_b9sTQX'

In [3]:
!gdown 1AhhYo_QWV9j6DCoisJJaZwJoB_b9sTQX

Downloading...
From: https://drive.google.com/uc?id=1AhhYo_QWV9j6DCoisJJaZwJoB_b9sTQX
To: /content/archive.zip
100% 124M/124M [00:04<00:00, 25.7MB/s]


In [4]:
!unzip archive.zip

Archive:  archive.zip
  inflating: Data/test/adenocarcinoma/000108 (3).png  
  inflating: Data/test/adenocarcinoma/000109 (2).png  
  inflating: Data/test/adenocarcinoma/000109 (4).png  
  inflating: Data/test/adenocarcinoma/000109 (5).png  
  inflating: Data/test/adenocarcinoma/000112 (2).png  
  inflating: Data/test/adenocarcinoma/000113 (7).png  
  inflating: Data/test/adenocarcinoma/000114 (5).png  
  inflating: Data/test/adenocarcinoma/000114.png  
  inflating: Data/test/adenocarcinoma/000115 (4).png  
  inflating: Data/test/adenocarcinoma/000115 (8).png  
  inflating: Data/test/adenocarcinoma/000115.png  
  inflating: Data/test/adenocarcinoma/000116 (5).png  
  inflating: Data/test/adenocarcinoma/000116 (7).png  
  inflating: Data/test/adenocarcinoma/000116 (9).png  
  inflating: Data/test/adenocarcinoma/000117 (4).png  
  inflating: Data/test/adenocarcinoma/000117 (8).png  
  inflating: Data/test/adenocarcinoma/000117.png  
  inflating: Data/test/adenocarcinoma/000118 (5).png  


In [5]:
!ls

archive.zip  Data  sample_data


In [6]:
!ls Data

test  train  valid


In [7]:
!ls Data/train

adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib     normal
large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa  squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa


In [8]:
!ls Data/valid

adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib     normal
large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa  squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa


#2. Load data

In [9]:
train_data = image_dataset_from_directory('./Data/train', label_mode = 'categorical',
  labels = 'inferred',
  shuffle = False,
  seed = 42,
  image_size =(256, 256),
  batch_size = 32
)

val_data = image_dataset_from_directory('./Data/valid', label_mode = 'categorical',
  labels = 'inferred',
  shuffle = False,
  seed = 42,
  image_size =(256, 256),
  batch_size = 32
)

test_data = image_dataset_from_directory('./Data/test', label_mode = 'categorical',
  labels = 'inferred',
  shuffle = False,
  seed = 42,
  image_size =(256, 256),
  batch_size = 32
)

Found 613 files belonging to 4 classes.
Found 72 files belonging to 4 classes.
Found 315 files belonging to 4 classes.


In [10]:
train_data.take(10)

<_TakeDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 4), dtype=tf.float32, name=None))>

# 3. Create Model

In [11]:
def create_model(input_shape: tuple, output_shape: int) -> Model:
  input = Input(shape = input_shape)
  x = Conv2D(32, (3, 3), activation = 'relu')(input)
  x = Conv2D(64, (3, 3), activation = 'relu')(x)
  x = Conv2D(128, (3, 3), activation = 'relu')(x)
  x = Conv2D(64, (3, 3), activation = 'relu')(x)
  x = Conv2D(32, (3, 3), activation = 'relu')(x)
  x = Flatten()(x)
  output = Dense(units = output_shape, activation = 'softmax')(x)
  return Model(input, output)

In [12]:
model = create_model((256, 256, 3), 4)

In [13]:
model.compile(loss = 'categorical_crossentropy',
              optimizer=Adam(learning_rate = 0.001),
              metrics = ['accuracy'])

In [14]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 254, 254, 32)      896       
                                                                 
 conv2d_1 (Conv2D)           (None, 252, 252, 64)      18496     
                                                                 
 conv2d_2 (Conv2D)           (None, 250, 250, 128)     73856     
                                                                 
 conv2d_3 (Conv2D)           (None, 248, 248, 64)      73792     
                                                                 
 conv2d_4 (Conv2D)           (None, 246, 246, 32)      18464     
                                                                 
 flatten (Flatten)           (None, 1936512)           0     

# 4. Train model

In [15]:
# Train the model
history = model.fit(
    train_data,
    epochs = 10,  # You can adjust the number of epochs
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
resnet_50 = tf.keras.applications.resnet50.ResNet50(include_top=False, weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [17]:
resnet_50.trainable = True

Explain why ??

In [19]:
def create_from_pretrain_model(pre_train_model, input_shape: tuple, output_shape: int) -> Model:
  input = Input(shape = input_shape)
  x = pre_train_model(input)
  x = GlobalAveragePooling2D()(x)
  output = Dense(units = output_shape, activation = 'softmax')(x)
  return Model(input, output)

In [20]:
custom_resnet_50 = create_from_pretrain_model(resnet_50, (256, 256, 3), 4)

In [21]:
custom_resnet_50.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, None, None, 2048   23587712  
                             )                                   
                                                                 
 global_average_pooling2d (  (None, 2048)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense_1 (Dense)             (None, 4)                 8196      
                                                                 
Total params: 23595908 (90.01 MB)
Trainable params: 23542788 (89.81 MB)
Non-trainable params: 53120 (207.50 KB)
_________________________________________________________________


In [22]:
custom_resnet_50.compile(loss = 'categorical_crossentropy',
              optimizer=Adam(learning_rate = 0.001),
              metrics = ['accuracy'])

In [24]:
# Train the model
history = custom_resnet_50.fit(
    train_data,
    epochs = 100,  # You can adjust the number of epochs
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78