# **Import libraries**

In [None]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import glob
import math
import cv2

import argparse
import random
import gc
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

from tqdm import tqdm
pd.options.display.max_colwidth = 1000
tqdm.pandas()

import keras
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Activation, Flatten, Add, Concatenate, Input
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D, LSTM, Reshape
from keras.layers import BatchNormalization, SeparableConv2D, DepthwiseConv2D, LeakyReLU, GlobalAveragePooling2D
from keras import optimizers
from keras import backend as K
from keras import layers

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.regularizers import L2
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Make sure we are able to handle large datasets
import resource

low, high = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (high, high))

# Initialize constants and lists

In [None]:
activity_map = {'c0': 'Safe driving',
                'c1': 'Texting - right',
                'c2': 'Talking on the phone - right',
                'c3': 'Texting - left',
                'c4': 'Talking on the phone - left',
                'c5': 'Operating the radio',
                'c6': 'Drinking',
                'c7': 'Reaching behind',
                'c8': 'Hair and makeup',
                'c9': 'Talking to passenger'}
class_mapping = {'c0': 0,
                'c1': 1,
                'c2': 2,
                'c3': 3,
                'c4': 4,
                'c5': 5,
                'c6': 6,
                'c7': 7,
                'c8': 8,
                'c9': 9}

# Algorithm hyperparameters
num_epochs = 70
batch_size = 64
width = 256
temperature = 0.1

# Stronger augmentations for contrastive, weaker ones for supervised training
contrastive_augmentation = {
    "min_area": 0.75, 
    "brightness": 0.5, 
    "jitter": 0.2
}

classification_augmentation = {
    "min_area": 0.8,
    "brightness": 0.3,
    "jitter": 0.1,
}

IMG_DIM = 208
CHANNEL_SIZE = 3
BATCH_SIZE = 64

# **Data preprocess**

> **Fetching training driver_imgs_list**

In [None]:
dir_path = '/kaggle/input/state-farm-distracted-driver-detection'

driver_imgs_list = pd.read_csv(os.path.join(dir_path, "driver_imgs_list.csv"))
driver_imgs_list['class'] = driver_imgs_list['classname'].replace(activity_map)
driver_imgs_list.head(2)

> **Fetching the list of training image**

In [None]:
list_train_img = glob.glob(os.path.join(dir_path, "imgs", "train", "*",  "*.jpg"))
print("Total number of Train Images is -------->", len(list_train_img))

> **Adding image path in the data frame**

In [None]:
driver_imgs_list['ImgPath'] = driver_imgs_list['img'].progress_apply(lambda x: [i for i in  list_train_img if x in i][0])

df = driver_imgs_list.copy()

del driver_imgs_list, list_train_img

In [None]:
df.head(1)

## **Data sampling**

In [None]:
# Calculate the total number of rows
total_rows = len(df)

# Divide the total number of rows by 2 to get the number of rows to keep
rows_to_keep = int((total_rows / 10) // 1.5)

# Group the DataFrame by column 'A' and sample the desired number of rows from each group
new_df = df.groupby('classname', group_keys=False).sample(n=rows_to_keep)

new_df.head()

del df
df = new_df.copy()
df.head()
del new_df

In [None]:
df['label'] = df['classname'].apply(lambda x: class_mapping[x])
df.head()

## **Data Overview**

In [None]:
# BatPlot and Piechart
class_freq_count = df.classname.value_counts()

class_freq_count.plot(kind='bar', label='index')
plt.title('Sample Per Class')
plt.show()

plt.pie(class_freq_count, autopct='%1.1f%%', shadow=True, labels=activity_map.values())
plt.title('Sample % per class')
plt.show()

> **Creating Function which will plot image by using their class and the imagePath**

In [None]:
def draw_driver(imgs, df, classId='c0'):
    fig, axis = plt.subplots(2, 3, figsize=(20, 7))
    for idnx, (idx, row) in enumerate(imgs.iterrows()):
        imgPath = os.path.join(dir_path, "imgs", "train", f"{classId}/{row['img']}")
        row = idnx // 3
        col = idnx % 3
        img = load_img(imgPath)
        #         img=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        plt.imshow(img)
        axis[row, col].imshow(img)
    plt.suptitle(activity_map[classId])
    plt.show()

In [None]:
draw_driver(df[df.classname == 'c0'].head(6), df, classId='c0')

In [None]:
draw_driver(df[df.classname == 'c1'].head(6), df, classId='c1')

## **Pretain - Finetune split:**
- Pretrain 0.8 - Finetune 0.2
- Pretrain: 0.8 train, 0.2 val
- Pretrain_train: 0.5 unlabeled, 0.5 labeled
- Finetune: 0.8 train, 0.2 val

In [None]:
from sklearn.model_selection import train_test_split
# pretrain and finetune split
pretrain_dataset, finetune_dataset = train_test_split(df ,test_size = 0.2 , random_state = 42, shuffle = True,
                                               stratify = df['class'])

# pretrain split
pretrain_train, pretrain_val = train_test_split(pretrain_dataset ,test_size = 0.2 , random_state = 42, shuffle = True,
                                               stratify = pretrain_dataset['class'])

pretrain_unlabeled, pretrain_labeled = train_test_split(pretrain_train ,test_size = 0.5 , random_state = 42, shuffle = True,
                                               stratify = pretrain_train['class'])

# finetune split

finetune_train, finetune_val = train_test_split(finetune_dataset, test_size = 0.2, random_state = 42, shuffle = True,
                                              stratify = finetune_dataset['class'])

In [None]:
print(len(pretrain_train))
print(len(pretrain_val))
print(len(finetune_train))
print(len(finetune_val))

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define data generators
image_datagen = ImageDataGenerator()

In [None]:
finetune_train_dataset = image_datagen.flow_from_dataframe(
                        dataframe = finetune_train,
                        x_col = 'ImgPath',
                        y_col = 'class',
                        target_size=(IMG_DIM,IMG_DIM),
                        class_mode = 'categorical',
                        batch_size= 64)

finetune_val_dataset = image_datagen.flow_from_dataframe(
                dataframe = finetune_val,
                x_col = 'ImgPath',
                y_col = 'class',
                target_size = (IMG_DIM, IMG_DIM),
                class_mode = 'categorical',
                batch_size = 64)

> Function to conver from pd.DataFrame to Tensorflow dataset

In [None]:
new_size = (IMG_DIM, IMG_DIM)
def data_conversion(df):
    # Down image datas from the directory and change into numpy_array datatype 
    
    resized_images = []
    for _, row in df.iterrows():
        # Read image from directory
        img_path = row['ImgPath']
        original_image = plt.imread(img_path)

        # Resize image
        new_size = (IMG_DIM, IMG_DIM)  # New size (width, height)
        resized_image = np.array(Image.fromarray(original_image).resize(new_size))

        # Add resized img to array
        resized_images.append(resized_image)

    # Chang array to NumPy_array
    X = np.array(resized_images)

    # Change column 'label' into y
    y = tf.convert_to_tensor(df['label'])

    # Create tf.data.Dataset
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.batch(batch_size)
    return dataset

In [None]:
pretrain_labeled_dataset = data_conversion(pretrain_labeled)
pretrain_unlabeled_dataset = data_conversion(pretrain_unlabeled)
pretrain_val_dataset = data_conversion(pretrain_val)
print(type(pretrain_labeled_dataset))
print(type(pretrain_unlabeled_dataset))
print(type(pretrain_val_dataset))

> Merging pretrain_labeled and pretrain_unlabeled

In [None]:
train_dataset = tf.data.Dataset.zip(
        (pretrain_unlabeled_dataset, pretrain_labeled_dataset)
    ).prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
type(train_dataset)

In [None]:
img = next(iter(train_dataset))[0][1]
print(img.shape)
print(type(img))
print(img)

In [None]:
img = next(iter(pretrain_val_dataset))
print(type(img))
print(img)

## **Image augmentation**

In [None]:
class RandomColorAffine(layers.Layer):
    def __init__(self, brightness=0, jitter=0, **kwargs):
        super(RandomColorAffine, self).__init__(**kwargs)

        self.brightness = brightness
        self.jitter = jitter

    def get_config(self):
        config = super(RandomColorAffine, self).get_config()
        config. update({"brightness": self.brightness, "jitter": self.jitter})
        return config

    def call(self, images, training=None):
        if training:
            batch_size = tf.shape(images)[0]

            brightness_scales = 1 + tf.random.uniform(
                (batch_size, 1, 1, 1), minval=-self.brightness, maxval=self.brightness
            )
            jitter_matrices = tf.random.uniform(
                (batch_size, 1, 3, 3), minval=-self.jitter, maxval=self.jitter
            )

            color_transforms = (
                tf.eye(3, batch_shape=[batch_size, 1]) * brightness_scales
                + jitter_matrices
            )
            images = tf.clip_by_value(tf.matmul(images, color_transforms), 0, 1)
        return images



# Image augmentation module
def get_augmenter(min_area, brightness, jitter):
    zoom_factor = 1.0 - math.sqrt(min_area)
    return keras.Sequential(
        [
            layers.Rescaling(1 / 255),
            layers.RandomFlip("horizontal"),
            layers.RandomTranslation(zoom_factor / 2, zoom_factor / 2),
            layers.RandomZoom((-zoom_factor, 0.0), (-zoom_factor, 0.0)),
            RandomColorAffine(brightness, jitter),
        ]
    )


def visualize_augmentations(num_images):
    # Sample a batch from a dataset
    images = next(iter(train_dataset))[0][0][:num_images]

    # Apply augmentations
    augmented_images = zip(
        images,
        get_augmenter(**classification_augmentation)(images),
        get_augmenter(**contrastive_augmentation)(images),
        get_augmenter(**contrastive_augmentation)(images),
    )
    row_titles = [
        "Original:",
        "Weakly augmented:",
        "Strongly augmented 1st:",
        "Strongly augmented 2nd:",
    ]
    plt.figure(figsize=(12, 8), dpi=100)
    for column, image_row in enumerate(augmented_images):
        for row, image in enumerate(image_row):
            plt.subplot(4, num_images, row * num_images + column + 1)
            plt.imshow(image)
            if column == 0:
                plt.title(row_titles[row], loc="left")
            plt.axis("off")
    plt.tight_layout()


visualize_augmentations(num_images=4)
