In [1]:
import cv2
import os
import numpy as np
import tensorflow as tf
import pandas as pd
import random

In [2]:
# Base directory where the test set is located.
base_dir_test = 'D:/JATHURSH/SEMI-06/project/state-farm-distracted-driver-detection/imgs/test'
img_size = 224

# Load test labels from the CSV file
test_labels_path = 'D:/JATHURSH/Jupyter_notebook_projects/DistractionDetection/test_labels.csv'
test_labels_df = pd.read_csv(test_labels_path)

test_labels_df.head()

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_1.jpg,1.035209e-15,4.319924e-15,3.324257e-19,1.107643e-21,6.408071e-13,1.0,2.489089e-11,4.737493e-14,2.398869e-13,7.066944e-13
1,img_10.jpg,2.715543e-09,4.765534e-10,1.169061e-10,1.461482e-12,4.745943e-09,0.999998,5.764518e-07,1.367466e-08,7.585934e-09,1.36683e-06
2,img_100.jpg,0.9998548,4.281003e-07,4.204876e-07,1.878015e-06,7.985772e-07,2.208164e-06,1.951224e-07,2.601749e-08,4.45145e-05,9.47056e-05
3,img_1000.jpg,1.760117e-15,4.474376e-23,1.053715e-16,2.119143e-13,1.344918e-16,2.082113e-15,5.24835e-14,7.964522e-15,1.0,5.700066e-13
4,img_100000.jpg,6.971782e-06,6.778057e-12,3.067891e-09,0.9999926,1.581636e-08,9.351293e-10,6.431943e-08,1.137067e-09,1.189417e-07,2.617498e-07


In [3]:
# Create an empty list to store test image data and labels
testdata = []

# Shuffle the test file names randomly
test_file_names = test_labels_df['img'].tolist()
random.shuffle(test_file_names)

# Select the first 2000 file names
selected_test_file_names = test_file_names[:2000]

# show first few of selected test file names
selected_test_file_names[:5]

['img_64392.jpg',
 'img_59461.jpg',
 'img_63307.jpg',
 'img_54850.jpg',
 'img_12272.jpg']

In [7]:
# Loop through the selected file names
for image_file in selected_test_file_names:
    image_path = os.path.join(base_dir_test, image_file)

    # Read and preprocess the image
    img_array = cv2.imread(image_path, cv2.IMREAD_COLOR)

    # Check if the image is grayscale
    if len(img_array.shape) == 2:
        # Convert grayscale image to RGB
        back_to_rgb = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
        new_img_array = cv2.resize(back_to_rgb, (img_size, img_size))
    elif len(img_array.shape) == 3:
        # Image is already in color, resize directly
        new_img_array = cv2.resize(img_array, (img_size, img_size))
    else:
        # Handle other cases if needed
        print(f"Warning: Unsupported image format for {image_file}")

    # Extract the corresponding probabilities from the CSV file
    probabilities = test_labels_df[test_labels_df['img'] == image_file].values[:, 1:]

    # Get the index of the column with the maximum probability (argmax)
    class_label = np.argmax(probabilities)

    # Append the image data and label to the testdata list
    testdata.append((new_img_array, class_label))

In [9]:
testdata[:2]

[(array([[[ 53,  64,  43],
          [ 53,  64,  44],
          [ 51,  62,  42],
          ...,
          [255, 254, 253],
          [255, 253, 254],
          [255, 252, 255]],
  
         [[ 49,  61,  39],
          [ 48,  59,  39],
          [ 48,  58,  41],
          ...,
          [255, 253, 254],
          [255, 253, 254],
          [255, 252, 255]],
  
         [[ 48,  59,  39],
          [ 46,  57,  36],
          [ 47,  57,  40],
          ...,
          [255, 252, 255],
          [255, 253, 255],
          [255, 253, 254]],
  
         ...,
  
         [[  6,   8,   8],
          [  6,   8,   8],
          [  6,   8,   8],
          ...,
          [ 41,  37,  36],
          [ 30,  26,  25],
          [ 27,  20,  23]],
  
         [[  6,   8,   8],
          [  6,   8,   8],
          [  6,   8,   8],
          ...,
          [ 66,  64,  63],
          [ 29,  27,  27],
          [ 26,  20,  25]],
  
         [[  6,   8,   8],
          [  6,   8,   8],
          [  6,   8,   8

In [13]:
len(testdata)

2000

In [14]:
X = []
y = []

# append features, labels to seperate arrays
for features, labels in testdata:
    X.append(features)
    y.append(labels)
    
# turn these into numpy arrays and reshape it
# creating RGB 3 channels - because pretrained model has RGB
X = np.array(X).reshape(-1, img_size, img_size, 3)
y = np.array(y)

In [15]:
X[:2], y[:2]

(array([[[[ 53,  64,  43],
          [ 53,  64,  44],
          [ 51,  62,  42],
          ...,
          [255, 254, 253],
          [255, 253, 254],
          [255, 252, 255]],
 
         [[ 49,  61,  39],
          [ 48,  59,  39],
          [ 48,  58,  41],
          ...,
          [255, 253, 254],
          [255, 253, 254],
          [255, 252, 255]],
 
         [[ 48,  59,  39],
          [ 46,  57,  36],
          [ 47,  57,  40],
          ...,
          [255, 252, 255],
          [255, 253, 255],
          [255, 253, 254]],
 
         ...,
 
         [[  6,   8,   8],
          [  6,   8,   8],
          [  6,   8,   8],
          ...,
          [ 41,  37,  36],
          [ 30,  26,  25],
          [ 27,  20,  23]],
 
         [[  6,   8,   8],
          [  6,   8,   8],
          [  6,   8,   8],
          ...,
          [ 66,  64,  63],
          [ 29,  27,  27],
          [ 26,  20,  25]],
 
         [[  6,   8,   8],
          [  6,   8,   8],
          [  6,   8,   8],
   

In [16]:
# free up memory
del testdata

# normalize
X_test_preprocessed = tf.keras.applications.mobilenet_v2.preprocess_input(X)

In [18]:
X_test_preprocessed.shape, X_test_preprocessed[:1]

((2000, 224, 224, 3),
 array([[[[-0.58431375, -0.4980392 , -0.6627451 ],
          [-0.58431375, -0.4980392 , -0.654902  ],
          [-0.6       , -0.5137255 , -0.67058825],
          ...,
          [ 1.        ,  0.99215686,  0.9843137 ],
          [ 1.        ,  0.9843137 ,  0.99215686],
          [ 1.        ,  0.9764706 ,  1.        ]],
 
         [[-0.6156863 , -0.52156866, -0.69411767],
          [-0.62352943, -0.5372549 , -0.69411767],
          [-0.62352943, -0.54509807, -0.6784314 ],
          ...,
          [ 1.        ,  0.9843137 ,  0.99215686],
          [ 1.        ,  0.9843137 ,  0.99215686],
          [ 1.        ,  0.9764706 ,  1.        ]],
 
         [[-0.62352943, -0.5372549 , -0.69411767],
          [-0.6392157 , -0.5529412 , -0.7176471 ],
          [-0.6313726 , -0.5529412 , -0.6862745 ],
          ...,
          [ 1.        ,  0.9764706 ,  1.        ],
          [ 1.        ,  0.9843137 ,  1.        ],
          [ 1.        ,  0.9843137 ,  0.99215686]],
 
      

In [19]:
# Save preprocessed X_test and y_test
save_dir = 'D:/JATHURSH/Jupyter_notebook_projects/DistractionDetection/saved_data/train_val_test_preprocessed/'
np.save(save_dir + 'X_test_preprocessed.npy', X_test_preprocessed)

In [20]:
np.save(save_dir + 'y_test.npy', y)

In [21]:
save_dir_test = 'D:/JATHURSH/Jupyter_notebook_projects/DistractionDetection/saved_data/test_split/'
np.save(save_dir_test + 'X_test.npy', X)
np.save(save_dir_test + 'y_test.npy', y)