# Import all the required dependancies

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

import matplotlib.pyplot as plt
from tensorflow.keras.applications.efficientnet import preprocess_input
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.regularizers import l2
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf


In [None]:
from tensorflow.keras.applications import EfficientNetB0


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("zeyad1mashhour/driver-inattention-detection-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/zeyad1mashhour/driver-inattention-detection-dataset?dataset_version_number=1...


100%|██████████| 373M/373M [00:03<00:00, 104MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/zeyad1mashhour/driver-inattention-detection-dataset/versions/1


In [None]:

# target_size = (224, 224)
# num_classes = 6

# train_folder = '/kaggle/input/driver-inattention-detection-dataset/train'
# test_folder = '/kaggle/input/driver-inattention-detection-dataset/test'
# valid_folder = '/kaggle/input/driver-inattention-detection-dataset/valid'

dataset_path = "/root/.cache/kagglehub/datasets/zeyad1mashhour/driver-inattention-detection-dataset/versions/1"

train_folder = os.path.join(dataset_path, "train")
valid_folder = os.path.join(dataset_path, "valid")
test_folder  = os.path.join(dataset_path, "test")

target_size = (224, 224)
num_classes = 6



# Data processing


In [None]:
def read_annotations(file_path):
    annotations = []
    with open(file_path, "r") as file:
        for line in file:
            parts = line.strip().split()

            if len(parts) < 2:
                continue

            image_name = parts[0]

            try:
                bbox = list(map(int, parts[1].split(",")))
                if len(bbox) != 5:
                    continue

                annotations.append([image_name] + bbox)

            except ValueError:
                continue

    return pd.DataFrame(
        annotations,
        columns=["image_name", "x_min", "y_min", "x_max", "y_max", "class_id"]
    )


def preprocess_image(image_path, target_size):
    image = cv2.imread(image_path)
    if image is None:
        return None

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, target_size)
    image = preprocess_input(image)

    return image


def one_hot_encode_labels(labels, num_classes):
    encoder = OneHotEncoder(sparse_output=False)
    labels = np.array(labels).reshape(-1, 1)
    one_hot = encoder.fit_transform(labels)
    return one_hot


def preprocess_dataset(annotations, folder_path, num_classes):
    images = []
    labels = []

    for _, row in annotations.iterrows():
        image_path = os.path.join(folder_path, row["image_name"])
        image = preprocess_image(image_path, target_size)

        if image is None:
            continue

        images.append(image)
        labels.append(row["class_id"])

    images = np.array(images)
    one_hot_labels = one_hot_encode_labels(labels, num_classes)

    return images, one_hot_labels


# --- FIXED PATHS WITH KAGGLEHUB DATASET ---
train_annotations = read_annotations(os.path.join(train_folder, "_annotations.txt"))
valid_annotations = read_annotations(os.path.join(valid_folder, "_annotations.txt"))
test_annotations  = read_annotations(os.path.join(test_folder, "_annotations.txt"))

# --- PREPROCESS DATASET ---
train_images, train_labels = preprocess_dataset(train_annotations, train_folder, num_classes)
valid_images, valid_labels = preprocess_dataset(valid_annotations, valid_folder, num_classes)
test_images,  test_labels  = preprocess_dataset(test_annotations,  test_folder,  num_classes)


In [None]:
print("Train images shape:", train_images.shape)
print("Train labels shape:", train_labels.shape)

print("Test images shape:", test_images.shape)
print("Test labels shape:", test_labels.shape)

print("Valid images shape:", valid_images.shape)
print("Valid labels shape:", valid_labels.shape)


Train images shape: (11948, 224, 224, 3)
Train labels shape: (11948, 6)
Test images shape: (985, 224, 224, 3)
Test labels shape: (985, 6)
Valid images shape: (1922, 224, 224, 3)
Valid labels shape: (1922, 6)


# Assigning the data to Train and Validation

In [None]:

(X_train, y_train), (X_val, y_val) = (train_images, train_labels), (valid_images, valid_labels)

# EfficientNetB0 model

In [None]:
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


# Training stage 1 with all layers frozen

In [None]:

class_weights = compute_class_weight('balanced',
                                    classes=np.unique(train_annotations['class_id']),
                                    y=train_annotations['class_id'])
class_weight_dict = dict(enumerate(class_weights))
for key in class_weight_dict:
    if class_weight_dict[key] < 1.0:
        class_weight_dict[key] *= 1.5

datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Warmup GPU
dummy_data = tf.random.uniform((1, 224, 224, 3))
model.predict(dummy_data)


model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


history = model.fit(datagen.flow(X_train, y_train, batch_size=16),
                    epochs=8,
                    validation_data=(X_val, y_val),
                    callbacks=[reduce_lr, early_stopping],
                    class_weight=class_weight_dict)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15s/step


  self._warn_if_super_not_called()


Epoch 1/8
[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 236ms/step - accuracy: 0.5969 - loss: 1.3034 - val_accuracy: 0.8398 - val_loss: 0.4997 - learning_rate: 0.0010
Epoch 2/8
[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 182ms/step - accuracy: 0.7653 - loss: 0.7293 - val_accuracy: 0.8418 - val_loss: 0.4371 - learning_rate: 0.0010
Epoch 3/8
[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 178ms/step - accuracy: 0.7905 - loss: 0.6348 - val_accuracy: 0.8585 - val_loss: 0.3978 - learning_rate: 0.0010
Epoch 4/8
[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 179ms/step - accuracy: 0.7961 - loss: 0.6081 - val_accuracy: 0.8954 - val_loss: 0.3406 - learning_rate: 0.0010
Epoch 5/8
[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 177ms/step - accuracy: 0.8164 - loss: 0.5585 - val_accuracy: 0.8944 - val_loss: 0.3286 - learning_rate: 0.0010
Epoch 6/8
[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
model.save("stage1_model.keras")
print("Stage 1 model saved!")


Stage 1 model saved!


In [None]:
model.save("/content/drive/MyDrive/stage2_model.keras")


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive
