# Dataset Labelling for Scores using a VLM

In [1]:
import dotenv

dotenv.load_dotenv()
import os
import json

from typing import List, Dict
from openai import AzureOpenAI

Helper function to process images into image data 

Code taken from Microsoft tutorials \
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/gpt-with-vision?tabs=rest

In [2]:
# Code taken from Microsoft tutorials
# https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/gpt-with-vision?tabs=rest

import base64
from mimetypes import guess_type

# Function to encode a local image into data URL 
def local_image_to_data_url(image_path):
    # Guess the MIME type of the image based on the file extension
    mime_type, _ = guess_type(image_path)
    if mime_type is None:
        mime_type = 'application/octet-stream'  # Default MIME type if none is found

    # Read and encode the image file
    with open(image_path, "rb") as image_file:
        base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')

    # Construct the data URL
    return f"data:{mime_type};base64,{base64_encoded_data}"

# Example usage
# image_path = './Hand_0000165.jpg'
# data_url = local_image_to_data_url(image_path)
# print("Data URL:", data_url)

VLM client setup

In [None]:
class LLM():
    """
    This class provides an API to obtain LLM outputs.
    """

    def __init__(self):
        """
        Sets up an instance of the LLM class by configuring the LLM client.
        """
        api_key = os.getenv("OPENAI_API_KEY")

        self.openai_client = AzureOpenAI(
            api_key=api_key,
            api_version="2024-06-01",
            azure_endpoint="https://hkust.azure-api.net",
            azure_deployment="gpt-4o-mini",
        )

    
    def _get_sys_prompt(self) -> str:
        """
        Provides an easy way to obtain the system prompt using methods in this class.

        returns:
        - a string containing the system prompt to use the LLM as a palmistry expert.
        """
        system_prompt = """You are a palmistry expert. 
You will be presented with an image of a palm.
Please perform analysis on the image of the palm as instructed.

## Supplementary Information on Palmistry
Palm lines of interest include the *Life Line*, the *Heart Line*, the *Fate Line*, and the *Head Line*.
These lines represent respectively, *enthusiasm and strength*, *romantic life*, *fortune and luck*, and *smartness and potential*.
You are to read these lines in detail, and to provide insights in the form of scores.

### Heart Line
- Begins below the index finger = content with love life
- Begins below the middle finger = selfish when it comes to love
- Begins in-between the middle and index fingers = caring and understanding
- Is straight and short = less interest in romance
- Touches life line = heart is broken easily
- Is long and curvy = freely expresses emotions and feelings
- Is straight and parallel to the head line = good handle on emotions
- Is wavy = many relationships, absence of serious relationships
- Circle on the line = sad or depressed
- Broken line = emotional trauma

### Head Line
- Short line = prefers physical achievements over mental ones
- Curved, sloping line = creativity
- Curves downward = inclination towards literature and fantasy
- Curves upwards towards little finger = aptitude for math, business, and logic
- Separated from life line = adventure, enthusiasm for life
- Wavy line = short attention span
- Deep, long line = thinking is clear and focused
- Straight line = thinks realistically
- Broken head line = inconsistencies in thought or has varying interests
- Multiple crosses through head line = momentous decisions

### Life Line
- Runs close to thumb = often tired
- Curves completely around the thumb = good physical and mental health
- Forked upwards = positive attitude towards life
- Forked downwards = pessimist
- Curvy = plenty of energy
- Forms a semicircle = enthusiastic and courageous
- Long and deep = vitality
- Short and shallow = manipulated by others
- Swoops around in a semicircle = strength and enthusiasm
- Straight and close to the edge of the palm = cautious when it comes to relationships
- Ends at base of index finger = academic achievement
- Ends at base of pinky finger = success in business
- Ends at base of ring finger = sign of wealth
- Ends below the thumb = strong attachment with family
- Multiple life lines = extra vitality
- Circle in line = hospitalized or injured
- Break in line = sudden change of lifestyle
- No line = nervous

### Fate Line
- Deep line = strongly controlled by fate
- Unbroken and runs straight across = successful life ahead
- Breaks and changes of direction = prone to many changes in life from external forces
- Fork in the line = great amount of wealth ahead
- Starts joined to life line = self-made individual; develops aspirations early on
- Joins with life line somewhere in the middle = signifies a point at which one’s interests must be surrendered to those of others
- Starts at base of thumb and crosses life line = support offered by family and friends
- No line = comfortable but uneventful life ahead

## Scoring Instructions
Provide scores for the user for `strength`, `romantic`, `luck`, and `potential`.
These correspond to the above points that you were asked to pay attention to previously.
Your scores should be within 0 and 1, with 1 being the highest possible score.
Give your output in the form of a JSON string, with the score keys being `strength`, `romantic`, `luck` and `potential`.
Do *NOT* place ANY markdown backticks in your output, as the output will be directly parsed in a Python script.
        """
        return system_prompt


    def get_LLM_output(
            self, 
            user_prompt: str, 
            system_prompt: str = None,
            image_data: str = None
        ) -> str:
        """
        Gets output from an LLM.

        args:
        - image_data (str): image URL after being encoded
        - user_prompt (str): user query towards the LLM
        - system_prompt (str): system prompt that provides context and instructions to the LLM

        returns:
        - a string that contains the LLM's output
        """
        sys_prompt = system_prompt if system_prompt else self._get_sys_prompt()
        
        messages = [
            {"role": "system", "content": sys_prompt}
        ]

        if image_data:
            user_message = [
                {
                    "type": "text",
                    "text": user_prompt
                },
                {
                    "type": "image_url",
                    "image_url": {"url": image_data}
                }
            ]
        else:
            user_message = user_prompt

        messages.append({"role": "user", "content": user_message})

        output = self.openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
        )

        return output.choices[0].message.content

Load dataset (first Kaggle dataset)

In [4]:
dataset_directory = "./Hands_kaggle1/Hands"

import pandas as pd
dataset_info = pd.read_csv("./Hands_kaggle1/HandInfo.csv")
dataset_info

Unnamed: 0,id,age,gender,skinColor,accessories,nailPolish,aspectOfHand,imageName,irregularities
0,0,27,male,fair,0,0,dorsal right,Hand_0000002.jpg,0
1,0,27,male,fair,0,0,dorsal right,Hand_0000003.jpg,0
2,0,27,male,fair,0,0,dorsal right,Hand_0000004.jpg,0
3,0,27,male,fair,0,0,dorsal right,Hand_0000005.jpg,0
4,0,27,male,fair,0,0,dorsal right,Hand_0000006.jpg,0
...,...,...,...,...,...,...,...,...,...
11071,1589,22,female,fair,0,0,palmar left,Hand_0011740.jpg,0
11072,1589,22,female,fair,0,0,palmar left,Hand_0011741.jpg,0
11073,1589,22,female,fair,0,0,palmar left,Hand_0011742.jpg,0
11074,1589,22,female,fair,0,0,palmar left,Hand_0011743.jpg,0


In [5]:
llm = LLM()

Labelling for initial Kaggle dataset \
https://www.kaggle.com/datasets/shyambhu/hands-and-palm-images-dataset?resource=download-directory

In [None]:
previd = ""
prevaspect = ""

labels = []

for index, row in dataset_info.iterrows():
    if "dorsal" in row['aspectOfHand']:
        continue
    
    if row['id'] == previd and row['aspectOfHand'] == prevaspect:
        continue

    previd = row['id']
    prevaspect = row['aspectOfHand']
    age = row['age']
    gender = row['gender']
    skincolor = row['skinColor']

    filename = row['imageName']

    user_prompt = f"""The following is an image of the user's palm.
The user's information is as follows:
Aspect of hand: {prevaspect}
Age: {age}
Gender: {gender}
Skin color: {skincolor}

Provide scores as you were instructed.
    """

    path = "./Hands_kaggle1/Hands/" + filename
    imagedata = local_image_to_data_url(path)

    scores = llm.get_LLM_output(user_prompt=user_prompt, image_data=imagedata)

    try:
        scores_dict = json.loads(scores)
    except Exception as e:
        print(f"An error occurred: {e}")
        scores_dict = scores
    
    label = {
        "image": filename,
        "scores": json.dumps(scores_dict)
    }
    labels.append(label)

labels_string = "" 
for item in labels:
    item_json = json.dumps(item)
    labels_string = labels_string + item_json + "\n"

with open("Hands_kaggle1/labels.json", "w") as f:
    f.write(labels_string)

Labelling for new Kaggle dataset \
https://www.kaggle.com/datasets/feyiamujo/human-palm-images

In [None]:
all_files_female = []

for root, _, files in os.walk("./Hands_kaggle2/FEMALE"):
        for file in files:
            full_file_path = os.path.join(root, file)
            all_files_female.append(full_file_path)


all_files_male = []

for root, _, files in os.walk("./Hands_kaggle2/MALE"):
        for file in files:
            full_file_path = os.path.join(root, file)
            all_files_male.append(full_file_path)

In [None]:
labels_female = []

for file in all_files_female:
    user_prompt = """The following is an image of the user's palm.
The user's information is as follows:
Gender: Female

Provide scores as you were instructed.
    """
    path = "./Hands_kaggle2/FEMALE/" + file
    imagedata = local_image_to_data_url(path)

    scores = llm.get_LLM_output(user_prompt=user_prompt, image_data=imagedata)

    try:
        scores_dict = json.loads(scores)
    except Exception as e:
        print(f"An error occurred: {e}")
        scores_dict = scores

    label = {
        "image": file,
        "scores": json.dumps(scores_dict)
    }
    labels_female.append(label)

labels_string_female = "" 
for item in labels_female:
    item_json = json.dumps(item)
    labels_string_female = labels_string_female + item_json + "\n"

with open("Hands_kaggle2/labels_female.json", "w") as f:
    f.write(labels_string)

In [None]:
labels_male = []

for file in all_files_male:
    user_prompt = """The following is an image of the user's palm.
The user's information is as follows:
Gender: Male

Provide scores as you were instructed.
    """
    path = "./Hands_kaggle2/MALE/" + file
    imagedata = local_image_to_data_url(path)

    scores = llm.get_LLM_output(user_prompt=user_prompt, image_data=imagedata)

    try:
        scores_dict = json.loads(scores)
    except Exception as e:
        print(f"An error occurred: {e}")
        scores_dict = scores

    label = {
        "image": file,
        "scores": json.dumps(scores_dict)
    }
    labels_male.append(label)

labels_string_male = "" 
for item in labels_male:
    item_json = json.dumps(item)
    labels_string_male = labels_string_male + item_json + "\n"

with open("Hands_kaggle2/labels_male.json", "w") as f:
    f.write(labels_string)