In [3]:
import torch
import numpy as np
import pandas as pd
import cv2
import os
import unicodedata

In [30]:
def convert_greyscale(image_path):
    '''
    Convert an image to 128 * 128 pixel greyscale image, overwrite the origianl

    Args:
        image_path: the location of the image
    '''

    # load image
    image = cv2.imread(image_path)

    # resize
    resized_image = cv2.resize(image, (128, 128))

    # convert to greyscale
    greyscale_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)

    # save
    cv2.imwrite(image_path, greyscale_image)

def get_images(data_path):
    '''
    Get all the images path under the data_path

    Args: 
        data_path: the directory where the images are located
    Return: a list of path of images
    '''
    images_path = [os.path.join(data_path, f) for f in os.listdir(data_path) if f.endswith('.jpg') and os.path.isfile(os.path.join(data_path, f))]
    return images_path

def get_directories(path):
    '''
    Get all the directories under path

    Args:
        path: directory path
    Return: a list of directories
    '''
    directories = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    return directories

def process_image_set(root_path):
    '''
    Process all the images in the dataset hierachy

    Args:
        root_path: the path of the dataset
    '''
    dirs = [root_path]
    while dirs:
        cur_path = dirs.pop(0)
        image_paths = get_images(cur_path)
        if image_paths == []:
            dirs += get_directories(cur_path)
        else:
            for path in image_paths:
                convert_greyscale(path)


In [31]:
dataset_path = "D:/Monash/Monash2024Sem1/FIT3161/logo_dataset/datasetcopy/trainandtest"
process_image_set(dataset_path)

In [4]:
def emoji_convert(emoji):
    '''
    Convert a 2-byte emoji to a 16-bit binary array

    Args: 
        emoji: a 2-byte emoji

    Return: 
        the 16-bit binary representation of the input emoji
    '''
    
    code_point = ord(emoji)

    binary_representation = format(code_point, '016b')

    return binary_representation

def get_2byte_emojis():
    '''
    Get all 2-byte emojis

    Return:
        a list of emojis
    '''
    emojis = []
    
    # Loop through all code points in (U+0000 to U+FFFF)
    for code_point in range(0x0000, 0x10000):
        char = chr(code_point)
        # Check the category and name of the character
        try:
            name = unicodedata.name(char)
            if "EMOJI" in name or "BLACK" in name or "WHITE" in name or "HEART" in name or "CLOUD" in name:
                emojis.append(char)
        except ValueError:
            # Skip the codes without a name
            pass

    return emojis

In [6]:
emojis = get_2byte_emojis()
print(len(emojis))
for emoji in emojis:
    binary_array = emoji_convert(emoji)

    print(f"Emoji: {emoji}")
    print(f"16-bit Binary: {binary_array}")

429
Emoji: ᔈ
16-bit Binary: 0001010100001000
Emoji: ᖰ
16-bit Binary: 0001010110110000
Emoji: ᖱ
16-bit Binary: 0001010110110001
Emoji: ᖲ
16-bit Binary: 0001010110110010
Emoji: ᖳ
16-bit Binary: 0001010110110011
Emoji: ᖴ
16-bit Binary: 0001010110110100
Emoji: ᖵ
16-bit Binary: 0001010110110101
Emoji: ᖶ
16-bit Binary: 0001010110110110
Emoji: ᖷ
16-bit Binary: 0001010110110111
Emoji: ᖸ
16-bit Binary: 0001010110111000
Emoji: ᖹ
16-bit Binary: 0001010110111001
Emoji: ᖺ
16-bit Binary: 0001010110111010
Emoji: ᖻ
16-bit Binary: 0001010110111011
Emoji: ᖼ
16-bit Binary: 0001010110111100
Emoji: ᖽ
16-bit Binary: 0001010110111101
Emoji: ᖾ
16-bit Binary: 0001010110111110
Emoji: ᖿ
16-bit Binary: 0001010110111111
Emoji: ᙿ
16-bit Binary: 0001011001111111
Emoji: ⁌
16-bit Binary: 0010000001001100
Emoji: ⁍
16-bit Binary: 0010000001001101
Emoji: ℌ
16-bit Binary: 0010000100001100
Emoji: ℑ
16-bit Binary: 0010000100010001
Emoji: ℜ
16-bit Binary: 0010000100011100
Emoji: ℨ
16-bit Binary: 0010000100101000
Emoji: ℭ
16-