# V2LG RA Project

## - Aakash Yadav

# Mounting Dataset

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!unzip "/content/gdrive/MyDrive/imageRanking/human_activity_retrieval_dataset.zip" -d "dataset"

In [3]:
cd /content/dataset/human_activity_retrieval_dataset

/content/dataset/human_activity_retrieval_dataset


In [4]:
ls

[0m[01;34mgallery[0m/  [01;34mquery_images[0m/  test_image_info.json  [01;34mtrain[0m/  train_image_info.json


# Importing Libraries

In [5]:
import tensorflow as tf
import json
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from collections import Counter
import cv2
import imgaug as ia
import imgaug.augmenters as iaa
import random
from PIL import Image

# Data Exploration

In [6]:
# Load the training JSON file
train_json_path = 'train_image_info.json'
with open(train_json_path, 'r') as json_file:
    train_data = json.load(json_file)

# Load the test JSON file
test_json_path = 'test_image_info.json'
with open(test_json_path, 'r') as json_file:
    test_data = json.load(json_file)


In [7]:
labels = list(train_data.values())

# Count the occurrences of each label
label_counts = Counter(labels)
for label, count in label_counts.items():
    print(f"Class: {label}, Count: {count}")

Class: sitting, Count: 763
Class: using_laptop, Count: 763
Class: hugging, Count: 762
Class: sleeping, Count: 761
Class: drinking, Count: 763
Class: clapping, Count: 763
Class: dancing, Count: 760
Class: cycling, Count: 763
Class: calling, Count: 763
Class: laughing, Count: 763
Class: eating, Count: 764
Class: fighting, Count: 764
Class: listening_to_music, Count: 764
Class: running, Count: 764
Class: texting, Count: 763


- 15 classes, with approximately same number of images, so balanced dataset

In [8]:
# Calculate the sum of all label counts
total_count = sum(label_counts.values())
print(f"Total Count: {total_count}")

Total Count: 11443


In [9]:
train_folder_path = 'train/'

# Lists to store image widths and heights
widths = []
heights = []

# Iterate through each image in train_data
for image_filename in train_data.keys():
    image_path = os.path.join(train_folder_path, image_filename)

    with Image.open(image_path) as img:
        image_size = img.size
        widths.append(image_size[0])  # width
        heights.append(image_size[1])  # height

# Calculate minimum and maximum values for widths and heights
min_width = min(widths)
max_width = max(widths)
min_height = min(heights)
max_height = max(heights)

# Print the results
print(f"Minimum Width: {min_width}")
print(f"Maximum Width: {max_width}")
print(f"Minimum Height: {min_height}")
print(f"Maximum Height: {max_height}")


Minimum Width: 84
Maximum Width: 478
Minimum Height: 84
Maximum Height: 318


- Images vary a lot in their size, so resizing is required for better implementation of a CNN model.

# Resizing the Images

In [10]:
target_size = (256, 256)
for folder_path in ["gallery/", "train/", "query_images/"]:
    for filename in os.listdir(folder_path):
        # Read the original image
        image_path = os.path.join(folder_path, filename)
        img = cv2.imread(image_path)

        # Resize the image
        resized_img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)

        # Save the resized image with the same filename
        cv2.imwrite(image_path, resized_img)

# Definig the CNN Architecture

In [11]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [12]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(15, activation='softmax'))
model.summary()
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 127, 127, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 62, 62, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 60, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 30, 30, 128)       0

# Creating Class Data Folders

In [13]:
if not os.path.exists('class_data'):
    os.makedirs('class_data')
class_data_dict = {}
for filename, label in train_data.items():
    class_subfolder_path = os.path.join('class_data', label)
    if not os.path.exists(class_subfolder_path):
        os.makedirs(class_subfolder_path)
    image_path = os.path.join('train/', filename)
    new_image_path = os.path.join(class_subfolder_path, filename)
    os.rename(image_path, new_image_path)
    if label not in class_data_dict:
        class_data_dict[label] = [filename]
    else:
        class_data_dict[label].append(filename)

# Loading Images from "train" Folder

In [18]:
from keras.utils import to_categorical

image_paths = []
labels = []
for filename, label in train_data.items():
    image_paths.append(os.path.join('class_data', label, filename))
    labels.append(label)
from sklearn.preprocessing import LabelEncoder

# Create a label encoder
encoder = LabelEncoder()

encoder.fit(labels)
encoded_labels = encoder.transform(labels)

# Create the image data generator
train_datagen = ImageDataGenerator(rescale=1./255)

# Load the training data
train_generator = train_datagen.flow_from_directory(
    'class_data',
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical'
)

Found 11443 images belonging to 15 classes.


In [19]:
# Train the model
model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=10
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a482f47ee30>

In [20]:
# Save the Model
model.save('cnn_model.keras')

In [32]:
from sklearn.metrics import average_precision_score, label_ranking_average_precision_score
from sklearn.preprocessing import LabelEncoder
from keras.applications.resnet50 import preprocess_input

labels = list(test_data.values())

# Use LabelEncoder to convert string labels to numerical labels
label_encoder = LabelEncoder()
numerical_labels = label_encoder.fit_transform(labels)

# Load and preprocess gallery and query images
def load_and_preprocess_image(folder, filename):
    img = cv2.imread(os.path.join(folder, filename))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # OpenCV uses BGR, convert to RGB
    img_array = preprocess_input(np.expand_dims(img, axis=0))
    return img_array

gallery_images = [load_and_preprocess_image("gallery", filename) for filename in os.listdir("gallery")]
query_images = [load_and_preprocess_image("query_images", filename) for filename in os.listdir("query_images")]

# Stack the image arrays
gallery_images = np.vstack(gallery_images)
query_images = np.vstack(query_images)

# Get the model predictions for query images on the gallery
query_predictions = model.predict(query_images)

# Calculate Mean Average Precision (mAP) at K={1, 10, 50} and mean rank
k_values = [1, 10, 50]
for k in k_values:
    average_precision = 0.0
    mean_rank = 0.0
    for i in range(len(query_predictions)):
        # Sort the gallery images based on similarity scores
        sorted_indices = np.argsort(query_predictions[i])[::-1]

        # Get the top k predictions
        top_k_indices = sorted_indices[:k]

        # Check if the true label is in the top k predictions
        if numerical_labels[i] in numerical_labels[top_k_indices]:
            precision = 1.0
            rank = np.where(numerical_labels[top_k_indices] == numerical_labels[i])[0][0] + 1
        else:
            precision = 0.0
            rank = k + 1

        average_precision += precision / k
        mean_rank += rank

    mean_average_precision = average_precision / len(query_predictions)
    mean_rank /= len(query_predictions)

    print(f"mAP@{k}: {mean_average_precision:.4f}")
    print(f"Mean Rank: {mean_rank:.2f}")


mAP@1: 0.4467
Mean Rank: 1.55
mAP@10: 0.0447
Mean Rank: 6.53
mAP@50: 0.0089
Mean Rank: 28.67
