In [1]:
import os
import sys
import time
import json
import torch
import base64
import string
import datasets
import argparse
import numpy as np
from utils import *
from tqdm import tqdm
from PIL import Image
from os import path, makedirs, getenv, mkdir
from huggingface_hub import login as hf_login

import openai
from openai import OpenAI

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


Unused kwargs: ['torch_dtype']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


In [5]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [6]:
#def encode_image(image_path):
#    img=Image.open(image_path)
#    img=img.resize((int(0.25*img.size[0]), int(0.25*img.size[1])))
#    return base64.b64encode(img.tobytes()).decode('utf-8')

In [7]:
def format_for_finetuning(system_prompt: str,
                          user_input: str,
                          base64_image: str,
                          assistant_output: str) -> str:
    """
    Format data in JSON for fine-tuning an OpenAI chatbot model.
    """

    return json.dumps(
        {
            "messages": [
                {"role": "system", "content": system_prompt}, 
                {"role": "user", "content": user_input},
                { "role": "user", "content": [
                    {"type": "image_url",
                     "image_url": {"url":  f"data:image/jpeg;base64,{base64_image}"}
                    }]
                },
                {"role": "assistant", "content": assistant_output}
            ]
        }
    )

In [8]:
parser = argparse.ArgumentParser(description='Fine-tune a spatial-join model.')
parser.add_argument('--model_id', type=str, default='gpt-4o-mini-2024-07-18', help='The model ID to fine-tune.')
parser.add_argument('--OPENAI_API_KEY', type=str, help='API key to finetune GPT-4o')
parser.add_argument('--dataset', type=str, default='beanham/spatial_join', help='The dataset to use for fine-tuning.')
parser.add_argument('--formatted_data_dir', type=str, help='The directory to save the formatted data to', default='formatted_data')
args = parser.parse_args(args=[])
hf_login()
if not path.exists(args.formatted_data_dir):
    mkdir(args.formatted_data_dir)
    print(f'Created directory {args.formatted_data_dir}')

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [9]:
paths={
    'p_path':'../2024-spatial-join-exp/join_task_imgs/positive/',
    'n_path':'../2024-spatial-join-exp/join_task_imgs/negative/',
    'fp_path':'../2024-spatial-join-exp/join_task_imgs/false_positive/',
    'fn_path':'../2024-spatial-join-exp/join_task_imgs/false_negative/'
}
system_message = """
You are a helpful geospatial analysis assistant! I will provide you with a pair of (sidewalk, road) information in GeoJSON format, along with a satellite image visualizing the sidewalk (red line) and road (blue line). Please help me identify whether the sidewalk is alongside the paired road, such that the sidewalk is adjacent and parellele to the road. If it is, please return 1; otherwise, return 0.
    
Please just return 0 or 1. No explaination needed.
"""

In [10]:
# ----------------------
# Load Data
# ----------------------
print('Downloading and preparing data...')
data = get_dataset_slices(args.dataset)
train = data['train']
val = data['val']

with open('../2024-spatial-join-exp/join_task_data/index.txt', 'r') as f:
    index = json.load(f)
train_index=index['train']
val_index=index['val']

Downloading and preparing data...


In [11]:
train_formatted=[]
for i in tqdm(range(len(train))):
    sidewalk = "Sidewalk: "+str(train['sidewalk'][i])
    road = "Road: "+str(train['road'][i])
    user_message=sidewalk+road
    img_name=train_index[i]
    if 'positive' in img_name:img_path=paths['p_path']+img_name+'.png'
    else:img_path=paths['n_path']+img_name+'.png'
    base64_image = encode_image(img_path)
    label="Lable: "+str(train['label'][i])    
    train_formatted.append(format_for_finetuning(system_message, user_message, base64_image, label))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2743/2743 [10:09<00:00,  4.50it/s]


In [12]:
val_formatted=[]
for i in tqdm(range(len(val))):
    sidewalk = "Sidewalk: "+str(val['sidewalk'][i])
    road = "Road: "+str(val['road'][i])
    user_message=sidewalk+road
    img_name=val_index[i]
    if 'positive' in img_name:img_path=paths['p_path']+img_name+'.png'
    else:img_path=paths['n_path']+img_name+'.png'
    base64_image = encode_image(img_path)
    label="Lable: "+str(val['label'][i])    
    val_formatted.append(format_for_finetuning(system_message, user_message, base64_image, label))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 305/305 [00:07<00:00, 42.39it/s]


In [15]:
train_formatted_join ='\n'.join(train_formatted)
val_formatted_join ='\n'.join(val_formatted)

In [16]:
# ----------------------------------
# Write the formatted data to a file
# ----------------------------------
print('Writing formatted data to file...')
with open(path.join(args.formatted_data_dir, 'gpt4o_vision_train.jsonl'), 'w') as f:
    f.write(train_formatted_join)
with open(path.join(args.formatted_data_dir, 'gpt4o_vision_val.jsonl'), 'w') as f:
    f.write(val_formatted_join)

Writing formatted data to file...


In [22]:
# ----------------------------------
# Set the OpenAI API key and create a client
# ----------------------------------        
client = OpenAI(api_key)

In [23]:
# Create the training dataset
train_response = client.files.create(
    file=open(path.join(args.formatted_data_dir, 'gpt4o_vision_train.jsonl'), "rb"),
    purpose="fine-tune"
)
val_response = client.files.create(
    file=open(path.join(args.formatted_data_dir, 'gpt4o_vision_val.jsonl'), "rb"),
    purpose="fine-tune"
)

In [None]:
# Create the fine-tuning job
job_response = client.fine_tuning.jobs.create(
    training_file=train_response.id,
    validation_file=val_response.id,
    model=args.model_id,
    hyperparameters={
        "n_epochs": 5,
    }
)