In [26]:
import os
import shutil

# Define the path to the dataset folder and the folders containing firearm and knives images
dataset_path = "./dataset"
firearm_path = os.path.join(dataset_path, "firearm_dataset")
knives_path = os.path.join(dataset_path, "knives_dataset")

# Define the labels for each dataset
firearm_label = "firearm"
knives_label = "knives"

# Define the paths to the labeled output folders
labeled_path = os.path.join(dataset_path, "labeled2")
firearm_labeled_path = os.path.join(labeled_path, firearm_label)
knives_labeled_path = os.path.join(labeled_path, knives_label)

# Create the labeled output folders if they don't already exist
if not os.path.exists(labeled_path):
    os.mkdir(labeled_path)
if not os.path.exists(firearm_labeled_path):
    os.mkdir(firearm_labeled_path)
if not os.path.exists(knives_labeled_path):
    os.mkdir(knives_labeled_path)

# Define a label encoding dictionary
label_encoding = {firearm_label: 0, knives_label: 1}

# Label and encode the firearm images
for filename in os.listdir(firearm_path):
    if filename.endswith(".jpg"):
        src_path = os.path.join(firearm_path, filename)
        dst_path = os.path.join(firearm_labeled_path, filename)
        shutil.copy(src_path, dst_path)
        label = label_encoding[firearm_label]
        # Save the label in a file with the same name as the image
        label_path = os.path.join(firearm_labeled_path, filename.split(".")[0] + ".txt")
        with open(label_path, "w") as f:
            f.write(str(label))
        # As a part of data cleaning, Checking if the label was saved correctly
        with open(label_path, "r") as f:
            saved_label = int(f.read())
            if saved_label != label:
                print("Error: Label mismatch for", filename)

# Label and encode the knives images
for filename in os.listdir(knives_path):
    if filename.endswith(".bmp"):
        src_path = os.path.join(knives_path, filename)
        dst_path = os.path.join(knives_labeled_path, filename)
        shutil.copy(src_path, dst_path)
        label = label_encoding[knives_label]
        # Save the label in a file with the same name as the image
        label_path = os.path.join(knives_labeled_path, filename.split(".")[0] + ".txt")
        with open(label_path, "w") as f:
            f.write(str(label))
        # As a part of data cleaning, Checking
        with open(label_path, "r") as f:
            saved_label = int(f.read())
            if saved_label != label:
                print("Error: Label mismatch for", filename)


In [27]:
#As the part of data cleaning this cell will parse through the images of firearm and kinves dataset 
#For checking any duplicate images, if found that file will be deleted.
import hashlib

# Define a dictionary to store the image hashes and file paths
image_dict = {}

# Parse through the firearm images to check for duplicates
for filename in os.listdir(firearm_path):
    if filename.endswith(".jpg"):
        file_path = os.path.join(firearm_path, filename)
        with open(file_path, "rb") as f:
            image_bytes = f.read()
        image_hash = hashlib.sha256(image_bytes).hexdigest()
        if image_hash in image_dict:
            print(f"Duplicate image found: {file_path} and {image_dict[image_hash]}")
            os.remove(file_path)  # remove the duplicate file
        else:
            image_dict[image_hash] = file_path

# Parse through the knives images to check for duplicates
for filename in os.listdir(knives_path):
    if filename.endswith(".bmp"):
        file_path = os.path.join(knives_path, filename)
        with open(file_path, "rb") as f:
            image_bytes = f.read()
        image_hash = hashlib.sha256(image_bytes).hexdigest()
        if image_hash in image_dict:
            print(f"Duplicate image found: {file_path} and {image_dict[image_hash]}")
            os.remove(file_path)  # remove the duplicate file
        else:
            image_dict[image_hash] = file_path


        

In [3]:
# Merging the labeled knive and firearm folder

# Define the path to the merged labeled output folder
merged_labeled_path = os.path.join(dataset_path, "labeled", "merged1")

# Create the merged labeled output folder if it doesn't already exist
if not os.path.exists(merged_labeled_path):
    os.mkdir(merged_labeled_path)

# Copy all the labeled firearm images to the merged labeled output folder
for filename in os.listdir(firearm_labeled_path):
    if filename.endswith(".jpg") or filename.endswith(".jpeg"):
        src_path = os.path.join(firearm_labeled_path, filename)
        dst_path = os.path.join(merged_labeled_path, filename)
        shutil.copy(src_path, dst_path)
    elif filename.endswith(".txt"):
        src_path = os.path.join(firearm_labeled_path, filename)
        dst_path = os.path.join(merged_labeled_path, filename)
        shutil.copy(src_path, dst_path)

# Copy all the labeled knives images to the merged labeled output folder
for filename in os.listdir(knives_labeled_path):
    if filename.endswith(".bmp"):
        src_path = os.path.join(knives_labeled_path, filename)
        dst_path = os.path.join(merged_labeled_path, filename)
        shutil.copy(src_path, dst_path)
    elif filename.endswith(".txt"):
        src_path = os.path.join(knives_labeled_path, filename)
        dst_path = os.path.join(merged_labeled_path, filename)
        shutil.copy(src_path, dst_path)



In [13]:

# Count the number of images in the merged folder
num_files = sum(len(files) for _, _, files in os.walk(merged_labeled_path))

print(f"There are {num_files} files in the merged folder")

There are 13090 files in the merged folder


In [29]:
import cv2

# Define the size of the normalized images
IMG_SIZE = (224, 224)

# Define the path to the labeled output folders
labeled_path = os.path.join(dataset_path, "labeled2")
firearm_labeled_path = os.path.join(labeled_path, "firearm")
knives_labeled_path = os.path.join(labeled_path, "knives")

# Define the output format for the normalized images
output_format = "jpg"
# Define the label values for each class
firearm_label = [1, 0]
knife_label = [0, 1]

# Normalize the firearm images and labels
for filename in os.listdir(firearm_labeled_path):
    if filename.endswith(".jpg"):
        # Load the image
        img_path = os.path.join(firearm_labeled_path, filename)
        img = cv2.imread(img_path)
        # Resize the image
        img = cv2.resize(img, IMG_SIZE)
        # Save the image
        output_path = os.path.join(firearm_labeled_path, filename)
        cv2.imwrite(output_path, img)
        # Normalize the label
        label_path = os.path.join(firearm_labeled_path, filename.split(".")[0] + ".txt")
        with open(label_path, 'w') as f:
            f.write(str(firearm_label))
    
# Normalize the knives images and labels
for filename in os.listdir(knives_labeled_path):
    if filename.endswith(".bmp"):
        # Load the image
        img_path = os.path.join(knives_labeled_path, filename)
        img = cv2.imread(img_path)
        # Resize the image
        img = cv2.resize(img, IMG_SIZE)
        # Save the image
        output_path = os.path.join(knives_labeled_path, filename.split(".")[0] + "." + output_format)
        cv2.imwrite(output_path, img)
        # Remove the original BMP image
        os.remove(img_path)
        # Normalize the label
        label_path = os.path.join(knives_labeled_path, filename.split(".")[0] + ".txt")
        with open(label_path, 'w') as f:
            f.write(str(knife_label))


In [31]:
import os
import random
import shutil
from sklearn.model_selection import train_test_split

# Define the path to the labeled output folders
labeled_path = "./dataset/labeled2"
firearm_labeled_path = os.path.join(labeled_path, "firearm")
knives_labeled_path = os.path.join(labeled_path, "knives")

# Define the ratio of images for the training and testing datasets
train_ratio = 0.65
test_ratio = 1 - train_ratio

# Define the paths for the output training and testing folders
output_train_path = "./dataset/training_dataset_65split"
output_test_path = "./dataset/testing_dataset_65split"

# Create the output training and testing folders
if not os.path.exists(output_train_path):
    os.makedirs(output_train_path)
if not os.path.exists(output_test_path):
    os.makedirs(output_test_path)

# Split the firearm images into training and testing datasets
firearm_images = os.listdir(firearm_labeled_path)
firearm_images_train, firearm_images_test = train_test_split(firearm_images, test_size=test_ratio)

# Copy the training firearm images
for filename in firearm_images_train:
    img_path = os.path.join(firearm_labeled_path, filename)
    output_img_path = os.path.join(output_train_path, filename)
    shutil.copy(img_path, output_img_path)
    
# Copy the testing firearm images 
for filename in firearm_images_test:
    img_path = os.path.join(firearm_labeled_path, filename)
    output_img_path = os.path.join(output_test_path, filename)
    shutil.copy(img_path, output_img_path)
    

# Split the knives images into training and testing datasets
knives_images = os.listdir(knives_labeled_path)
knives_images_train, knives_images_test = train_test_split(knives_images, test_size=test_ratio)

# Copy the training knives images and labels to the output training folder
for filename in knives_images_train:
    img_path = os.path.join(knives_labeled_path, filename)
    output_img_path = os.path.join(output_train_path, filename)
    shutil.copy(img_path, output_img_path)

    
# Copy the testing knives images and labels to the output testing folder
for filename in knives_images_test:
    img_path = os.path.join(knives_labeled_path, filename)
    output_img_path = os.path.join(output_test_path, filename)
    shutil.copy(img_path, output_img_path)
   

