In [None]:
### Setup the Environment

# !pip install numpy pandas tensorflow keras opencv-python scikit-learn matplotlib
# !pip install torch torchvision


In [1]:
### Load Annotations and Prepare the Dataset

import os
import pandas as pd
import numpy as np
import cv2
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load annotations
annotations_path = './train_coco_annotations.csv'
train_folder = './data/train/'

# Load the CSV file
annotations = pd.read_csv(annotations_path)

# Prepare data
image_paths = []
labels = []

for _, row in annotations.iterrows():
    image_id = row['FileName']
    category = row['Category'] # 0 as holds, 1 as volumns
    image_path = os.path.join(train_folder, f"{image_id}")  # Adjust extension if needed .jpg

    if os.path.exists(image_path):
        image_paths.append(image_path)
        labels.append(category)
    else:
        print(f"Image not found: {image_path}")

# Convert labels to numeric
unique_classes = list(set(labels))
class_to_idx = {cls: idx for idx, cls in enumerate(unique_classes)}
labels_numeric = [class_to_idx[label] for label in labels]

# # One-hot encode the labels
# labels_one_hot = to_categorical(labels_numeric, num_classes=len(unique_classes))

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(image_paths, labels_numeric, test_size=0.2, random_state=42)

# Print summary
print(f"Number of training samples: {len(X_train)}")
print(f"Number of validation samples: {len(X_val)}")


Number of training samples: 103894
Number of validation samples: 25974


In [None]:
# # debugging codes
# print(f"Labels: {labels}")
# print(f"Unique Classes: {unique_classes}")
# print(f"Class to Index Mapping: {class_to_idx}")
# print(f"Labels Numeric: {labels_numeric}")

# for path in image_paths:
#     if not os.path.exists(path):
#         print(f"Missing file: {path}")

# print(f"Train Folder: {train_folder}")
# print(os.listdir(train_folder))

# print(f"Annotations Dataset Shape: {annotations.shape}")
# print(annotations.head())
# print(f"Columns in Dataset: {annotations.columns}")
# print(annotations['Category'].unique())  # Replace 'label_column' with the actual column name

# labels = annotations['Category'].tolist()  # Replace 'label_column' with the actual column name
# print(f"Extracted Labels: {labels}")



Labels: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [2]:
### Build a Data Loader

# Data generator function
def data_generator(image_paths, labels, batch_size, image_size=(224, 224)):
    while True:
        for start in range(0, len(image_paths), batch_size):
            end = min(start + batch_size, len(image_paths))
            batch_images = []
            batch_labels = labels[start:end]
            
            for image_path in image_paths[start:end]:
                image = cv2.imread(image_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, image_size) / 255.0  # Normalize
                batch_images.append(image)
            
            yield np.array(batch_images), np.array(batch_labels)


In [None]:
# ### Define and Train the Model (this model takes 10 hours)

# from tensorflow.keras.applications import ResNet50
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
# from tensorflow.keras.optimizers import Adam

# # Define the model
# base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(224, 224, 3)))
# x = GlobalAveragePooling2D()(base_model.output)
# x = Dense(512, activation='relu')(x)
# output = Dense(len(unique_classes), activation='softmax')(x)

# model = Model(inputs=base_model.input, outputs=output)

# # Freeze the base model for transfer learning
# for layer in base_model.layers:
#     layer.trainable = False

# # Compile the model
# model.compile(optimizer=Adam(learning_rate=0.001),
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

# # One-hot encode the labels
# y_train = to_categorical(y_train, num_classes=2)  # Assuming 2 classes
# y_val = to_categorical(y_val, num_classes=2)

# # Train the model
# batch_size = 32
# train_gen = data_generator(X_train, y_train, batch_size=batch_size)
# val_gen = data_generator(X_val, y_val, batch_size=batch_size)

# steps_per_epoch = len(X_train) // batch_size
# validation_steps = len(X_val) // batch_size

# model.fit(train_gen,
#           steps_per_epoch=steps_per_epoch,
#           validation_data=val_gen,
#           validation_steps=validation_steps,
#           epochs=10)


Epoch 1/10
[1m 939/3246[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m1:04:07[0m 2s/step - accuracy: 0.9220 - loss: 0.2937

KeyboardInterrupt: 

In [None]:
### Model with improved efficiency

from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ReduceLROnPlateau
import tensorflow as tf
import numpy as np

# Enable mixed precision training for faster computation
from tensorflow.keras.mixed_precision import Policy, set_global_policy
# from tensorflow.keras.mixed_precision import experimental as mixed_precision
# policy = mixed_precision.Policy('mixed_float16')
# mixed_precision.set_policy(policy)

from tensorflow.keras.mixed_precision import Policy, set_global_policy

# Enable mixed precision training
policy = Policy('mixed_float16')
set_global_policy(policy)


# Define the model with a smaller base (MobileNetV2)
base_model = MobileNetV2(weights='imagenet', include_top=False, input_tensor=Input(shape=(128, 128, 3)))
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(512, activation='relu')(x)
output = Dense(2, activation='softmax')(x)  # Assuming 2 classes (holds and volumes)

model = Model(inputs=base_model.input, outputs=output)

# Freeze the base model for initial training
for layer in base_model.layers:
    layer.trainable = False

# Compile the model with mixed precision and Adam optimizer
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes=2)
y_val = to_categorical(y_val, num_classes=2)

# Preprocess images and create TensorFlow datasets
def preprocess_images(image_paths, labels):
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    def load_and_preprocess_image(path, label):
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, (128, 128))
        image = tf.keras.applications.mobilenet_v2.preprocess_input(image)
        return image, label
    dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(32).prefetch(tf.data.AUTOTUNE)
    return dataset

# Create train and validation datasets
train_dataset = preprocess_images(X_train, y_train)
val_dataset = preprocess_images(X_val, y_val)

# Learning rate scheduler for dynamic adjustment
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1)

# Train the model
model.fit(train_dataset,
          validation_data=val_dataset,
          epochs=10,
          callbacks=[lr_scheduler])

# Unfreeze the base model for fine-tuning
for layer in base_model.layers:
    layer.trainable = True

# Recompile with a smaller learning rate for fine-tuning
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Fine-tune the model
model.fit(train_dataset,
          validation_data=val_dataset,
          epochs=5,
          callbacks=[lr_scheduler])


  base_model = MobileNetV2(weights='imagenet', include_top=False, input_tensor=Input(shape=(128, 128, 3)))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/10
[1m  53/3247[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m25:49[0m 485ms/step - accuracy: 0.8478 - loss: 0.9250

In [4]:
# !pip install --upgrade tensorflow
from tensorflow.keras.mixed_precision import Policy, set_global_policy

# Enable mixed precision training
policy = Policy('mixed_float16')
set_global_policy(policy)


In [None]:
### Debug codes
# print(f"y_train shape: {len(y_train)}")
# print(f"y_val shape: {len(y_val)}")
# print(f"y_train shape: {y_train.shape}")
# print(f"y_val shape: {y_val.shape}")
# print(f"Model output shape: {model.output_shape}")

# import numpy as np

# # Convert to NumPy arrays
# y_train = np.array(y_train)
# y_val = np.array(y_val)

# # Debug the shapes
# print(f"y_train shape: {y_train.shape}")
# print(f"y_val shape: {y_val.shape}")
# print(f"Model output shape: {model.output_shape}")



Model output shape: (None, 2)
y_train shape: (103894,)
y_val shape: (25974,)
Model output shape: (None, 2)


In [None]:
# for batch_X, batch_y in train_gen:
#     print(f"Batch X shape: {batch_X.shape}, Batch y shape: {batch_y.shape}")
#     break

# print(f"Unique Classes: {unique_classes}")
# print(f"Number of Unique Classes: {len(unique_classes)}")

# print(f"Steps per epoch: {steps_per_epoch}")
# print(f"Validation steps: {validation_steps}")


Batch X shape: (32, 224, 224, 3), Batch y shape: (32,)
Unique Classes: [0, 1]
Number of Unique Classes: 2
Steps per epoch: 3246
Validation steps: 811


In [None]:
### Save and Evaluate the Model

# Save the model
model.save('trained_model.h5')

# Evaluate
val_loss, val_acc = model.evaluate(val_gen, steps=validation_steps)
print(f"Validation Loss: {val_loss}, Validation Accuracy: {val_acc}")


In [None]:
### Test the Model

# Load test images
test_annotations_path = './test_coco_annotation.csv'
test_folder = './data/test/'
test_annotations = pd.read_csv(test_annotations_path)

test_image_paths = [os.path.join(test_folder, f"{row['image_id']}.jpg") for _, row in test_annotations.iterrows()]

# Generate predictions
for image_path in test_image_paths:
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224)) / 255.0
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    
    prediction = model.predict(image)
    predicted_class = unique_classes[np.argmax(prediction)]
    print(f"Image: {image_path}, Predicted Class: {predicted_class}")
