In [5]:
import os
import numpy as np
import pandas as pd
import json
import tensorflow as tf
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import EfficientNetB0

from sklearn.model_selection import train_test_split


from PIL import Image

In [3]:
IMAGE_DIR = '/Users/mjs/Desktop/Dev/TACO_MJS/data/all_images'

# Set the batch size and image size
BATCH_SIZE = 16
IMG_SIZE = (224, 224)

# Load the image file names and labels from a CSV file
df = pd.read_csv('./data/labels.csv')



In [8]:
image_size = 224
batch_size = 6
epochs = 10
learning_rate = 0.0001


df = pd.read_csv('./data/labels.csv')

labels = list(df.columns)[1:]

# Split data into train and validation sets
train_df, val_df = train_test_split(df, test_size=0.2)

# Define data generators for train and validation sets
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.005,
    zoom_range=[0.9, 1.4],
    horizontal_flip=True,
    vertical_flip=False,
    brightness_range=(.8,1.2),
    fill_mode='nearest',
)

datagen_val = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
)

train_generator = datagen_train.flow_from_dataframe(
    dataframe=train_df,
    directory='/Users/mjs/Desktop/Dev/TACO/data/all_images',
    x_col='file_name',
    y_col=labels,
    class_mode='raw',
    target_size=(image_size, image_size), 
    batch_size=batch_size,
    shuffle=True,
)

val_generator = datagen_val.flow_from_dataframe(
    dataframe=val_df,
    directory='/Users/mjs/Desktop/Dev/TACO/data/all_images',
    x_col='file_name',
    y_col=labels,
    class_mode='raw',
    target_size=(image_size, image_size), 
    batch_size=batch_size,
    shuffle=False,
)

Found 1200 validated image filenames.
Found 300 validated image filenames.


In [10]:

base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

# Freeze all the layers in the base model
for layer in base_model.layers:
    layer.trainable = True

# Add a custom classification head
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
predictions = Dense(len(labels), activation='sigmoid')(x)

# Create the final model by combining the base model with the custom head
model = tf.keras.models.Model(inputs=base_model.input, outputs=predictions)

# Compile the model
optimizer = Adam(lr=LEARNING_RATE)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Define callbacks
#checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)

# Train the model
# Train the model
history = model.fit(train_generator,
                    validation_data=val_generator,
                    epochs=EPOCHS,
                    batch_size=BATCH_SIZE,
                    #callbacks=[checkpoint]
                   )

# Convert the history object to a dictionary
history_dict = history.history
# Convert any numpy arrays to lists so they can be serialized by JSON
for key, val in history_dict.items():
    if isinstance(val, np.ndarray):
        history_dict[key] = val.tolist()
# Save the history dictionary to a JSON file
with open('history.json', 'w') as outfile:
    json.dump(history_dict, outfile)

Epoch 1/10
Epoch 2/10

KeyboardInterrupt: 

In [129]:
# Evaluate the model on the validation set
loss, accuracy = model.evaluate(val_generator)
print(f'Validation loss: {loss:.3f}')
print(f'Validation accuracy: {accuracy:.3f}')

Validation loss: 0.122
Validation accuracy: 0.006
