In [1]:
from snorkel.labeling import labeling_function
import json
import os
import numpy as np

In [2]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
import numpy as np

def calculate_metrics(y_true, y_pred, abstain_class=-1):
    # Filter out samples where prediction is -1
    valid_indices = y_pred != abstain_class
    y_true_filtered = y_true[valid_indices]
    y_pred_filtered = y_pred[valid_indices]

    # Compute metrics
    precision = precision_score(y_true_filtered, y_pred_filtered, average='macro')
    recall = recall_score(y_true_filtered, y_pred_filtered, average='macro')
    f1 = f1_score(y_true_filtered, y_pred_filtered, average='macro')
    accuracy = accuracy_score(y_true_filtered, y_pred_filtered)

    return {
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Accuracy': accuracy
    }

In [11]:
POSITIVE = 1
NEGATIVE = 0
ABSTAIN = -1

@labeling_function()
def llava_7b(image_name):
    root_path = '../prompting_framework/prompting_results/cars/'
    llava_7b_results = 'cars-llava-7b.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    image_name = image_name.replace("'","")
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    return data[image_name]["label"] if data[image_name]["label"] is not None else -1

# @labeling_function()
# def llava_34b(image_name):
#     root_path = '../prompting_framework/prompting_results/aircraft/'
#     llava_7b_results = 'oxford-llava_34b.json'
#     path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
#     with open(path_to_llava_7b_results, 'r') as file:
#         data = json.load(file)

#     return data.get(image_name, -1)

# @labeling_function()
# def llava_13b(image_name):
#     root_path = '../prompting_framework/prompting_results/cars/'
#     llava_7b_results = 'aircraft-llava-13b.json'
#     path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
#     with open(path_to_llava_7b_results, 'r') as file:
#         data = json.load(file)

#     return data[image_name]["label"] if data[image_name]["label"] is not None else -1

@labeling_function()
def bakllava(image_name):
    root_path = '../prompting_framework/prompting_results/cars/'
    llava_7b_results = 'cars-bakllava.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    image_name = image_name.replace("'","")
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    return data[image_name]["label"] if data[image_name]["label"] is not None else -1

@labeling_function()
def llava_llama3(image_name):
    root_path = '../prompting_framework/prompting_results/cars/'
    llava_7b_results = 'cars-llava-llama3.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    image_name = image_name.replace("'","")
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    return data[image_name]["label"] if data[image_name]["label"] is not None else -1


@labeling_function()
def moondream(image_name):
    root_path = '../prompting_framework/prompting_results/cars/'
    llava_7b_results = 'cars-moondream.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    image_name = image_name.replace("'","")
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    return data[image_name]["label"] if data[image_name]["label"] is not None else -1


@labeling_function()
def llava_phi3(image_name):
    root_path = '../prompting_framework/prompting_results/cars/'
    llava_7b_results = 'cars-llava-phi3.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    image_name = image_name.replace("'","")
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    return data[image_name]["label"] if data[image_name]["label"] is not None else -1


# @labeling_function()
# def llama3_2_vision(image_name):
#     root_path = '../prompting_framework/prompting_results/cars/'
#     llava_7b_results = 'aircraft-llama3.2-vision-11b.json'
#     path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
#     with open(path_to_llava_7b_results, 'r') as file:
#         data = json.load(file)

#     return data[image_name]["label"] if data[image_name]["label"] is not None else -1

In [12]:
train_data_json_path = '../prompting_framework/prompting_results/cars/cars-moondream-train-raw_info.json'
test_data_json_path = '../prompting_framework/prompting_results/cars/cars-moondream-test-raw_info.json'

with open(train_data_json_path, 'r') as file:
    train_data = json.load(file)

# with open(val_data_json_path, 'r') as file:
#     val_data = json.load(file)

with open(test_data_json_path, 'r') as file:
    test_data = json.load(file)

# Extract and pad image names, ensuring they are 5 digits long before the '.png'
train_image_names = []
for item in train_data:
    train_image_names.append(item)

# val_image_names = []
# Y_val = []
# for item in val_data:
#     val_image_names.append(item)
#     Y_val.append(val_data[item]["label"])

test_image_names = []
Y_test = []
for item in test_data:
    test_image_names.append(item)
    Y_test.append(test_data[item]["label"])

# with open(dev_data_json_path, 'r') as file:
#     dev_data = json.load(file)
    
# dev_image_names = []
# Y_dev = []
# for item in dev_data:
#     Y_dev.append(dev_data[item])
#     dev_image_names.append(item)

print(f"There are {len(train_image_names)} images in the Train set.")
# print(f"There are {len(val_image_names)} images in the val set.")
print(f"There are {len(test_image_names)} images in the test set.")


There are 8144 images in the Train set.
There are 8041 images in the test set.


In [13]:
llava_7b(train_image_names[0])

14

In [14]:
from snorkel.labeling import LFApplier

list_of_all_the_models = ['bakllava',
       'llava_7b',
       'llava_llama3',
       'moondream',
       'llava_phi3'
       ]

lfs = [bakllava,
       llava_7b,
       llava_llama3,
       moondream,
       llava_phi3
       ]

applier = LFApplier(lfs)

In [15]:
from snorkel.labeling import LFAnalysis

L_test = applier.apply(test_image_names)
L_train = applier.apply(train_image_names)

8041it [07:30, 17.85it/s]
8144it [07:34, 17.91it/s]


In [16]:
np.save("L_train_cars_1.npy", L_train)
np.save("L_test_cars_1.npy", L_test)

In [21]:
Y_test = np.array(Y_test)
LFAnalysis(L_test, lfs).lf_summary(Y_test-1)

Unnamed: 0,j,Polarity,Coverage,Overlaps,Conflicts,Correct,Incorrect,Emp. Acc.
bakllava,0,"[0, 1, 2, 5, 6, 10, 11, 12, 14, 16, 17, 18, 20...",1.0,1.0,0.998881,301,7740,0.037433
llava_7b,1,"[0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 16, 17...",1.0,1.0,0.998881,926,7115,0.11516
llava_llama3,2,"[0, 1, 2, 3, 5, 6, 8, 9, 10, 11, 13, 14, 16, 1...",1.0,1.0,0.998881,648,7393,0.080587
moondream,3,"[0, 1, 2, 5, 6, 11, 14, 17, 23, 24, 27, 31, 32...",0.996145,0.996145,0.99515,86,7924,0.010737
llava_phi3,4,"[0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 1...",0.879741,0.879741,0.878746,449,6625,0.063472


In [23]:
np.max(Y_test-1)

195

In [19]:
np.max(L_test)

195