 large portion of code (blocks 1 -32) was taken from:
 https://github.com/DerikVo/DSI_project_4_plant_disease/blob/main/notebooks/01_Potato_PlantVillageEDA.ipynb

In [181]:
# imports
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns

from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from keras.applications import ConvNeXtTiny

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [182]:
import numpy as np
np.random.seed(1)
from tensorflow.keras.utils import set_random_seed
set_random_seed(1)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [183]:
!ls /content/drive/MyDrive/HackAthon

hotdog-nothotdog.zip


In [None]:
unzip_path = f'/content/drive/MyDrive/HackAthon/hotdog-nothotdog.zip'
!unzip -q "$unzip_path"

In [184]:
# initialize the data generator
datagen = ImageDataGenerator(validation_split=0.2)

# get training data
train_ds = datagen.flow_from_directory(
    '/content/train',
    target_size=(256, 256),
    batch_size=32,
    class_mode='binary',
    subset='training',  # set as training data
    seed=42
)

# get validation data
val_ds = datagen.flow_from_directory(
    '/content/train',
    target_size=(256, 256),
    batch_size=32,
    class_mode='binary',
    subset='validation',  # set as validation data
    seed=42,
    shuffle=False
)

test_ds = datagen.flow_from_directory(
    '/content/test',
    target_size=(256, 256),
    class_mode='binary',
    seed=42,
    shuffle=False
)

Found 2400 images belonging to 2 classes.
Found 600 images belonging to 2 classes.
Found 644 images belonging to 2 classes.


In [None]:
train_ds.class_indices

{'hotdog': 0, 'nothotdog': 1}

In [None]:
test_ds.class_indices

{'hotdog': 0, 'nothotdog': 1}

In [None]:
convnet = ConvNeXtTiny(include_top = False, weights="imagenet", classes = 2)
convnet.trainable = False

In [None]:
es = EarlyStopping(patience = 5)

model1 = Sequential()
model1.add(layers.Input(shape=(256,256,3)))
#model1.add(layers.Rescaling(1./255))
model1.add(convnet)

# Convolutional and pooling layers
model1.add(Conv2D(64, (3,3), activation='relu'))
model1.add(Dropout(0.5))
model1.add(Conv2D(32, (3,3), activation='relu'))
model1.add(MaxPooling2D(pool_size=(2,2)))

model1.add(Flatten())
# Dense Layers
model1.add(Dense(64, activation='relu'))

# Output
model1.add(Dense(1, activation='sigmoid'))

model1.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["acc"]
)

In [190]:
from tensorflow.keras.metrics import binary_accuracy, AUC, Precision, Recall

In [186]:
model1.input_shape

(None, 256, 256, 3)

In [203]:
standard_metrics = [
    'binary_accuracy', 
    'AUC', 
    'Precision', 
    'Recall'
]

In [187]:
model1.summary()

Model: "sequential_38"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 convnext_tiny (Functional)  (None, None, None, 768)   27820128  
                                                                 
 flatten_33 (Flatten)        (None, 49152)             0         
                                                                 
 dense_53 (Dense)            (None, 64)                3145792   
                                                                 
 dense_54 (Dense)            (None, 1)                 65        
                                                                 
Total params: 30,965,985
Trainable params: 3,145,857
Non-trainable params: 27,820,128
_________________________________________________________________


In [204]:
h1 = model1.fit(train_ds,
               validation_data=val_ds,
               callbacks = [es],
               epochs=20)

Epoch 1/20

KeyboardInterrupt: ignored

In [201]:
es = EarlyStopping(patience = 5)

model2 = Sequential()
model2.add(layers.Input(shape=(256,256,3)))
#model1.add(layers.Rescaling(1./255))
model2.add(convnet)

# Convolutional and pooling layers

model2.add(Flatten())
# Dense Layers
model2.add(Dense(64, activation='relu'))

# Output
model2.add(Dense(1, activation='sigmoid'))

model2.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=standard_metrics
)

In [202]:
h2 = model2.fit(train_ds,
               validation_data=val_ds,
               callbacks = [es],
               epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20


In [229]:
export = []
for i in standard_metrics:
  export.append(h2.history[f'{i.lower()}'][-1])
  export.append(h2.history[f'val_{i.lower()}'][-1])
export

[1.0,
 0.9700000286102295,
 1.0,
 0.9931887984275818,
 1.0,
 0.9607843160629272,
 1.0,
 0.9800000190734863]

In [256]:
df = pd.DataFrame(data = np.array([export]), columns = ['train_accuracy','val_accuracy','train_AUC','val_AUC','train_Precision','val_Precision', 'train_Recall ','val_Recall'], index= ['convnet'])
df

Unnamed: 0,train_accuracy,val_accuracy,train_AUC,val_AUC,train_Precision,val_Precision,train_Recall,val_Recall
convnet,1.0,0.97,1.0,0.993189,1.0,0.960784,1.0,0.98


In [241]:
df.to_csv('model2.csv')

In [None]:
model1.save('tiny_conv_model.h5')

---
## InceptionV3

In [243]:
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input

In [244]:
datagen1 = ImageDataGenerator(validation_split=0.2)

In [245]:
# need to resize for inception, built separate read
train_ds_inception = datagen1.flow_from_directory(
    '/content/train',
    target_size=(299, 299),  # Resize
    batch_size=32,
    class_mode='binary', 
    subset='training',  # set as training data
    seed=42
)

val_ds_inception = datagen1.flow_from_directory(
    '/content/train',
    target_size=(299, 299),  # Resize
    batch_size=32,
    class_mode='binary',
    subset='validation',  # set as validation data
    seed=42,
    shuffle=False
)

test_ds_inception = datagen1.flow_from_directory(
    '/content/test',
    target_size=(299, 299),  # Resize
    batch_size=32,
    class_mode='binary',  
    seed=42,
    shuffle=False
)

Found 2400 images belonging to 2 classes.
Found 600 images belonging to 2 classes.
Found 644 images belonging to 2 classes.


In [247]:
# Early stopping
early_stopping = EarlyStopping(patience=4)

# Load InceptionV3 model
inception = InceptionV3(include_top=False, weights='imagenet', input_shape=(299, 299, 3))

# freeze the layers
for layer in inception.layers:
    layer.trainable = False

# input layer
inputs = layers.Input(shape=(299, 299, 3))

# pre-process for InceptionV3
x = preprocess_input(inputs) 

x = inception(x, training=False)

x = Flatten()(x) 

x = Dense(64, activation='relu')(x)

x = BatchNormalization()(x)

x = Dropout(0.2)(x)

outputs = Dense(1, activation = 'sigmoid')(x)

# Assemble the model
model_inception = Model(inputs=inputs, outputs=outputs)

# Compile the model
model_inception.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=standard_metrics)

# Fit the model to the data
res_cnn_inception = model_inception.fit(train_ds_inception,
                    epochs=20,
                    batch_size=16,
                    validation_data=val_ds_inception,
                    verbose=1,
                    callbacks=[early_stopping])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


In [248]:
export2 = []
for i in standard_metrics:
  export2.append(res_cnn_inception.history[f'{i.lower()}'][-1])
  export2.append(res_cnn_inception.history[f'val_{i.lower()}'][-1])
export2

[0.9975000023841858,
 0.9783333539962769,
 0.9999784827232361,
 0.9976389408111572,
 0.9983305335044861,
 0.9584664702415466,
 0.996666669845581,
 1.0]

In [254]:
df2 = pd.DataFrame(data = np.array([export2]), columns = ['train_accuracy','val_accuracy','train_AUC','val_AUC','train_Precision','val_Precision', 'train_Recall ','val_Recall'], index= ['cnn'])
df2.head(1)

Unnamed: 0,train_accuracy,val_accuracy,train_AUC,val_AUC,train_Precision,val_Precision,train_Recall,val_Recall
cnn,0.9975,0.978333,0.999978,0.997639,0.998331,0.958466,0.996667,1.0


In [259]:
df.head(1)

Unnamed: 0,train_accuracy,val_accuracy,train_AUC,val_AUC,train_Precision,val_Precision,train_Recall,val_Recall
convnet,1.0,0.97,1.0,0.993189,1.0,0.960784,1.0,0.98


In [260]:
df2.to_csv('model_cnn.csv')