In [3]:
import torch
import numpy as np
import pandas as pd
import cv2
import os

In [22]:
def convert_greyscale(image_path):
    '''
    Convert an image to 128 * 128 pixel greyscale image, overwrite the origianl

    Args:
        image_path: the location of the image
    '''

    # load image
    image = cv2.imread(image_path)

    # resize
    resized_image = cv2.resize(image, (128, 128))

    # convert to greyscale
    greyscale_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)

    # save
    cv2.imwrite(image_path, greyscale_image)

def get_images(data_path):
    '''
    Get all the images path under the data_path

    Args: 
        data_path: the directory where the images are located
    Return: a list of path of images
    '''
    images_path = [os.path.join(data_path, f) for f in os.listdir(data_path) if f.endswith('.jpg') and os.path.isfile(os.path.join(data_path, f))]
    return images_path

def get_directories(path):
    '''
    Get all the directories under path

    Args:
        path: directory path
    Return : a list of directories
    '''
    directories = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    return directories

def process_image_set(root_path):
    '''
    Process all the images in the dataset hierachy

    Args:
        root_path: the path of the dataset
    '''
    dirs = [root_path]
    while dirs:
        cur_path = dirs.pop(0)
        image_paths = get_images(cur_path)
        if image_paths == []:
            dirs += get_directories(cur_path)
        else:
            for path in image_paths:
                convert_greyscale(path)


In [28]:
dataset_path = "D:/Monash/Monash2024Sem1/FIT3161/logo_dataset/datasetcopy/trainandtest"
process_image_set(dataset_path)