In [None]:
!python --version

In [None]:
import os

'''
Important paths
'''
path_to_original_pascal3dp = '/srv/PASCAL3D+_release1.1/'

small_occluder_path = '/srv/occluder_libs_test_small.npz'
medium_occluder_path = '/srv/occluder_libs_test_medium.npz'
large_occluder_path = '/srv/occluder_libs_test_large.npz'

# just for now...
occluder_path = '/srv/occluder_libs_test_low.npz'

bg_list_path = path_to_original_pascal3dp + 'Image_sets/%s_imagenet_val.txt'
bg_img_path = path_to_original_pascal3dp + 'Images/%s_imagenet'
bg_anno_path = path_to_original_pascal3dp + 'Annotations/%s_imagenet'
bg_mask_path = path_to_original_pascal3dp + 'obj_mask/%s'

path_save = './results'
save_img_path = path_save + '/images'
save_anno_path = path_save + '/annotations'

In [None]:
%pip install -q -U google-generativeai
%pip install google-cloud-aiplatform
%pip install --upgrade -q google-api-python-client google-auth-httplib2 google-auth-oauthlib
%pip install mediapipe

In [None]:
import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow

SCOPES = ['https://www.googleapis.com/auth/generative-language.retriever']

def load_creds():
    """Converts `client_secret.json` to a credential object.

    This function caches the generated tokens to minimize the use of the
    consent screen.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'client_secret.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return creds

In [None]:
import pprint
import google.generativeai as genai

creds = load_creds()

genai.configure(credentials=creds)

In [None]:
#import BboxTools as bbt
import os
import csv
from collections import defaultdict
from PIL import Image
import scipy
import random

def get_labels():
    all_labels = {}

    
    for sizing in ["small", "medium", "large"]:

        with open('foreground_labels_' + sizing + '.csv', 'r') as file:
            labels = defaultdict(list)
            csv_reader = csv.reader(file)
            # print(csv_reader.to_list())
            first_row = True
            for row in csv_reader:
                if first_row:
                    first_row = False
                    continue
                # print(row)
                labels[row[1]].append(int(row[0]))
                # break
            all_labels[sizing] = labels
        
    return all_labels

def load_one_annotation(anno_path):
    a = scipy.io.loadmat(anno_path)
    # I added the astype int here....
    bbox_ = a['record'][0][0][1][0][0][1][0].astype(int)
    w = a['record']['size'][0][0][0][0][0]
    h = a['record']['size'][0][0][0][0][1]
    num_obj = len(a['record'][0][0][1][0])
    return w, h, bbox_, num_obj != 1

# the only reason we have bg_mask_dir is in case we need it for segmentation masks...
def generate_dataset(file_list, bg_img_dir, bg_anno_dir, file_obj, category):
    print("CATEGORY", category)
    
    all_labels = get_labels()

    model = genai.GenerativeModel("gemini-1.5-pro")
    
    # DO 100 PER CATEGORY
        
    unnecessary_labels = {
        "bicycle": ["Toddler", "Cycling", "People", "Motorcycle", "Children", "Woman", "Athlete", "Person", "Man", "Girl", "Boy", "Baby/Infant", "Baby", "Couple", "Parking", "Motorcyclist"],
        "bus": ["Parking"],
        "car": ["Parking"],
        "motorbike": ["Toddler", "Cycling", "People", "Motorcycle", "Children", "Woman", "Athlete", "Person", "Man", "Girl", "Boy", "Baby/Infant", "Baby", "Couple", "Parking", "Motorcyclist"],
        "train": ["Parking"]
    }
    
    for file_name in file_list[:min(100, len(file_list))]:
        
        bg_w, bg_h, bg_bbox, flag_ = load_one_annotation(os.path.join(bg_anno_dir, file_name + '.mat'))

        if flag_:
            # record_file.write('Skipped %s for multi objects\n' % file_name)
            continue
            
        bg_img_path = os.path.join(bg_img_dir, file_name + '.JPEG')
        # print(bg_img_path)
        bg_img = Image.open(bg_img_path)
        
        csv_output = bg_img_path
        
        for sizing in ["small", "medium", "large"]:
            label_titles = list(all_labels[sizing].keys())
            
            compiled_label_titles = [x for x in label_titles if x not in unnecessary_labels[category]]
            prompt = "You will be a given a list of foreground objects, and you are suppose to choose a category from that list to cover the subject in the image provided. Your answer should be one category from the list provided and should be a one word answer. You have to make a selection, and cannot say no. The list is: " + str(compiled_label_titles)
            response = model.generate_content([prompt, bg_img])
            fg_category = response.text.strip()
            # print(fg_category)
            if fg_category in label_titles:
                fg_category_indexes = all_labels[sizing][fg_category]
                rand_index = random.choice(fg_category_indexes)

                csv_output += ","+str(rand_index)
                print(fg_category)
        csv_output += "\n"
        print(csv_output)
        file_obj.write(csv_output)

In [None]:
categories = ['bicycle', 'bus', 'car', 'motorbike', 'train']
path_to_original_pascal3dp = '/srv/PASCAL3D+_release1.1/'
bg_list_path = path_to_original_pascal3dp + 'Image_sets/%s_imagenet_val.txt'
bg_anno_path = path_to_original_pascal3dp + 'Annotations/%s_imagenet'

pair_csv = open("bg_fg_pair.csv", "w")
pair_csv.write("image_name,fg_index_small,fg_index_medium,fg_index_large\n")

for cate in categories:
    print('Start cate: ', cate)
    # tem = open('generating_record_%s_1030.txt' % cate, 'w')
    file_list_ = open(bg_list_path % cate).readlines()
    file_list_ = [tem.strip('\n') for tem in file_list_]
    bg_anno_path_ = bg_anno_path % cate
    bg_img_path_ = bg_img_path % cate

    generate_dataset(file_list_, bg_img_path_, bg_anno_path_, pair_csv, cate)
    # break
    
pair_csv.close()