In [1]:
from snorkel.labeling import labeling_function
import json
import os
import numpy as np

In [60]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, auc
from sklearn.metrics import precision_recall_curve
import numpy as np

def calculate_metrics(y_true, y_pred, y_scores=None, abstain_class=-1):
    # Filter out samples where prediction is -1
    valid_indices = y_pred != abstain_class
    y_true_filtered = y_true[valid_indices]
    y_pred_filtered = y_pred[valid_indices]
    # y_scores_fileterd = y_scores[valid_indices]

    # Compute metrics
    if y_scores is not None:
        precision_list, recall_list, threshold = precision_recall_curve(y_true, y_scores)
        auc_score = auc(recall_list, precision_list)
    else:
        precision_list, recall_list, threshold = precision_recall_curve(y_true, y_pred)
        auc_score = auc(recall_list, precision_list)
    conf_matrix = confusion_matrix(y_true_filtered, y_pred_filtered)
    precision = precision_score(y_true_filtered, y_pred_filtered)
    recall = recall_score(y_true_filtered, y_pred_filtered)
    f1 = f1_score(y_true_filtered, y_pred_filtered)
    accuracy = accuracy_score(y_true_filtered, y_pred_filtered)

    return {
        'Confusion Matrix': conf_matrix,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Accuracy': accuracy,
        'auc': auc_score
    }

In [56]:
POSITIVE = 1
NEGATIVE = 0
ABSTAIN = -1

dataset_name = "stop-sign"

@labeling_function()
def llava_7b_test(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava 7b-test.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llava_7b_train(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava 7b-train.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llava_13b_test(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava 13b-test.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llava_13b_train(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava 13b-train.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1
        
@labeling_function()
def bakllava_test(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-bakllava-test.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def bakllava_train(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-bakllava-train.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1
        
@labeling_function()
def llava_llama3_test(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava-llama3-test.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llava_llama3_train(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava-llama3-train.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1
        
@labeling_function()
def llava_phi3_test(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava-phi3-test.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llava_phi3_train(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava-phi3-train.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1
        

@labeling_function()
def moondream_test(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-moondream-test.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def moondream_train(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-moondream-train.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llama_3_2_vision_test(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llama3.2-vision 11b-test.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llama_3_2_vision_train(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llama3.2-vision 11b-train.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llava_34b_test(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava 34b-test.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

@labeling_function()
def llava_34b_train(image_name):
    root_path = f'../../prompting_framework/prompting_results/agile_datasets/{dataset_name}/'
    llava_7b_results = f'{dataset_name}-llava 34b-train.json'
    path_to_llava_7b_results = os.path.join(root_path,llava_7b_results)
    with open(path_to_llava_7b_results, 'r') as file:
        data = json.load(file)

    try:
        return 1-data[image_name]['label'] if data[image_name]['label'] is not None else -1
    except(KeyError):
        return -1

In [57]:
print(llava_7b_test('3'))
print(llava_llama3_test('4'))

1
0


In [8]:
train_data_json_path = '../../prompting_framework/prompting_results/agile_datasets/stop-sign/stop-sign-llava 13b-train-raw_info.json'
test_data_json_path = '../../prompting_framework/prompting_results/agile_datasets/stop-sign/stop-sign-llava 7b-test-raw_info.json'

with open(train_data_json_path, 'r') as file:
    train_data = json.load(file)


with open(test_data_json_path, 'r') as file:
    test_data = json.load(file)

# Extract and pad image names, ensuring they are 5 digits long before the '.png'
train_image_names = []
Y_train = []
for item in train_data:
    train_image_names.append(item)
    Y_train.append(train_data[item]["label"])


test_image_names = []
Y_test = []
for item in test_data:
    test_image_names.append(item)
    Y_test.append(test_data[item]["label"])

# with open(dev_data_json_path, 'r') as file:
#     dev_data = json.load(file)
    
# dev_image_names = []
# Y_dev = []
# for item in dev_data:
#     Y_dev.append(dev_data[item])
#     dev_image_names.append(item)

print(f"There are {len(train_image_names)} images in the Train set.")

print(f"There are {len(test_image_names)} images in the test set.")


There are 3034 images in the Train set.
There are 542 images in the test set.


In [6]:
from snorkel.labeling import LFApplier

list_of_all_the_models = [
    'llava_13b_test',
       'llava_7b_test',
       # 'llava_34b_test',
       'llava_llama3_test',
       'bakllava_test',
       'llama_3_2_vision_test',
       'llava_phi3_test',
       'moondream_test'
       ]

test_lfs = [llava_13b_test,
       llava_7b_test,
       # llava_34b_test,
       llava_llama3_test,
       bakllava_test,
       llama_3_2_vision_test,
       llava_phi3_test,
       moondream_test
       ]

train_lfs = [llava_13b_train,
       llava_7b_train,
       # llava_34b_train,
       llava_llama3_train,
       bakllava_train,
       llama_3_2_vision_train,
       llava_phi3_train,
       moondream_train
       ]

test_applier = LFApplier(test_lfs)
train_applier = LFApplier(train_lfs)


In [7]:
from snorkel.labeling import LFAnalysis

L_test = test_applier.apply(test_image_names)
L_train = train_applier.apply(train_image_names)

542it [00:02, 188.06it/s]
3034it [01:17, 39.06it/s]


In [8]:
L_test

array([[1, 1, 1, ..., 0, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [0, 0, 0, ..., 0, 0, 1],
       ...,
       [1, 1, 1, ..., 0, 1, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 0, 1]])

In [9]:
L_test

array([[1, 1, 1, ..., 0, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [0, 0, 0, ..., 0, 0, 1],
       ...,
       [1, 1, 1, ..., 0, 1, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 0, 1]])

In [32]:
Y_test_numerical = []
for i in range(len(Y_test)):
    if Y_test[i] == 'Yes':
        Y_test_numerical.append(1)
    elif Y_test[i] == 'No':
        Y_test_numerical.append(0)

Y_test_numerical = np.array(Y_test_numerical)

Y_train_numerical = []
for i in range(len(Y_train)):
    if Y_train[i] == 'Yes':
        Y_train_numerical.append(1)
    elif Y_train[i] == 'No':
        Y_train_numerical.append(0)

Y_train_numerical = np.array(Y_train_numerical)

LFAnalysis(L_test, test_lfs).lf_summary(Y_test_numerical)

Unnamed: 0,j,Polarity,Coverage,Overlaps,Conflicts,Correct,Incorrect,Emp. Acc.
llava_13b_test,0,"[0, 1]",0.964945,0.964945,0.854244,364,159,0.695985
llava_7b_test,1,"[0, 1]",0.968635,0.968635,0.857934,401,124,0.76381
llava_llama3_test,2,"[0, 1]",0.9631,0.9631,0.852399,392,130,0.750958
bakllava_test,3,"[0, 1]",0.96679,0.96679,0.856089,386,138,0.736641
llama_3_2_vision_test,4,"[0, 1]",0.96679,0.96679,0.856089,304,220,0.580153
llava_phi3_test,5,"[0, 1]",0.964945,0.964945,0.854244,363,160,0.694073
moondream_test,6,[1],0.96679,0.96679,0.856089,231,293,0.44084


In [61]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score, f1_score

# Example ground truth and predictions for six models
# Replace these arrays with actual predictions from each model
y_true = Y_test_numerical
predictions = {}

for i in range(L_test.shape[1]):
    predictions[list_of_all_the_models[i]] = L_test[:,i]
    
# Create a DataFrame to store confusion matrix results and metrics
confusion_data = []

for model_name, y_pred in predictions.items():
    
    y_pred = np.array(y_pred)
    # Confusion Matrix
    metrics = calculate_metrics(Y_test_numerical, y_pred)
    
    tn, fp, fn, tp = metrics['Confusion Matrix'].ravel()
    precision = metrics['Precision']
    recall = metrics['Recall']
    f1 = metrics['F1 Score']
    accuracy = metrics['Accuracy']
    # Append data
    confusion_data.append([
        model_name, tn, fp, fn, tp, 
        recall, precision, accuracy, f1
    ])

# Convert to a DataFrame for display
confusion_df = pd.DataFrame(confusion_data, columns=[
    'Model', 'True Negative', 'False Positive', 'False Negative', 'True Positive', 
    'Recall', 'Precision', 'Accuracy', 'F1 Score'
])

# Display the table with confusion matrix and metrics
confusion_df

    

Unnamed: 0,Model,True Negative,False Positive,False Negative,True Positive,Recall,Precision,Accuracy,F1 Score
0,llava_13b_test,137,155,4,227,0.982684,0.594241,0.695985,0.74062
1,llava_7b_test,243,50,74,158,0.681034,0.759615,0.76381,0.718182
2,llava_llama3_test,176,115,15,216,0.935065,0.652568,0.750958,0.768683
3,bakllava_test,179,113,25,207,0.892241,0.646875,0.736641,0.75
4,llama_3_2_vision_test,256,37,183,48,0.207792,0.564706,0.580153,0.303797
5,llava_phi3_test,219,73,87,144,0.623377,0.663594,0.694073,0.642857
6,moondream_test,0,293,0,231,1.0,0.44084,0.44084,0.611921


# Majority Vote

In [62]:
def majority_vote(labels):
    # Assuming the labels are categorical and using mode to find the most frequent label
    from scipy.stats import mode
    # Using mode along axis=1 to find the most common element across columns
    modes = mode(labels, axis=1)
    # modes.mode contains the most common values, reshaping to (500,) for a clean 1D array output
    return modes.mode.reshape(-1)

# Applying the majority vote function
majority_labels_test = majority_vote(L_test)
majority_labels_train = majority_vote(L_train)

In [63]:
metrics = calculate_metrics(Y_test_numerical, majority_labels_test, majority_labels_test)
for metric, value in metrics.items():
    print(f"{metric}: {value}")

metrics = calculate_metrics(Y_train_numerical, majority_labels_train, majority_labels_train)
for metric, value in metrics.items():
    print(f"{metric}: {value}") 

Confusion Matrix: [[196  96]
 [ 24 207]]
Precision: 0.6831683168316832
Recall: 0.8961038961038961
F1 Score: 0.7752808988764045
Accuracy: 0.7705544933078394
auc: 0.8016341229789515
Confusion Matrix: [[2395  231]
 [  35  295]]
Precision: 0.5608365019011406
Recall: 0.8939393939393939
F1 Score: 0.6892523364485982
Accuracy: 0.9100135317997293
auc: 0.7063240320281985


# Snorkel

In [36]:
from snorkel.labeling.model import LabelModel

label_model = LabelModel(cardinality=2, verbose=False)
label_model.fit(L_train, n_epochs=5000, log_freq=500, seed=12345)

100%|████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2762.91epoch/s]


In [64]:
from snorkel.analysis import metric_score
from snorkel.utils import probs_to_preds

probs_dev = label_model.predict_proba(L_test)
preds_dev = probs_to_preds(probs_dev)

probs_train = label_model.predict_proba(L_train)
preds_train = probs_to_preds(probs_train)

metrics = calculate_metrics(Y_test_numerical, preds_dev, np.max(probs_dev, axis=1))
for metric, value in metrics.items():
    print(f"{metric}: {value}")

metrics = calculate_metrics(Y_train_numerical, preds_train, np.max(probs_train, axis=1))
for metric, value in metrics.items():
    print(f"{metric}: {value}")

Confusion Matrix: [[128 176]
 [  5 233]]
Precision: 0.5696821515892421
Recall: 0.9789915966386554
F1 Score: 0.7202472952086554
Accuracy: 0.6660516605166051
auc: 0.7637363816704039
Confusion Matrix: [[2107  582]
 [  10  335]]
Precision: 0.3653217011995638
Recall: 0.9710144927536232
F1 Score: 0.5309033280507132
Accuracy: 0.8048780487804879
auc: 0.7181036476600229


In [49]:
np.max(probs_dev, axis=1)

array([1.        , 1.        , 0.98154076, 1.        , 0.99690139,
       0.98154076, 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.98154076, 0.99690139, 0.98154076, 1.        ,
       0.98810192, 1.        , 0.98154076, 1.        , 0.98154076,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.98154076, 0.98154076, 1.        , 0.98154076,
       1.        , 1.        , 1.        , 0.9958436 , 1.        ,
       0.98154076, 1.        , 0.99999978, 0.98154076, 0.98154076,
       0.5       , 1.        , 1.        , 0.98154076, 0.99215528,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.5       , 1.        , 0.99999978, 1.        ,
       0.5       , 0.98154076, 1.        , 1.        , 1.        ,
       0.98154076, 0.99999954, 1.        , 0.98154076, 1.        ,
       0.98154076, 0.99999976, 1.        , 1.        , 0.99999976,
       1.        , 1.        , 0.98154076, 1.        , 1.     

# Hyper Label Model

In [40]:
from hyperlm import HyperLabelModel
hlm = HyperLabelModel()

In [41]:
hyper_pred_dev = hlm.infer(L_test[:,:])
hyper_pred_train = hlm.infer(L_train)

metrics = calculate_metrics(Y_test_numerical, hyper_pred_dev)
for metric, value in metrics.items():
    print(f"{metric}: {value}")

metrics = calculate_metrics(Y_train_numerical, hyper_pred_train)
for metric, value in metrics.items():
    print(f"{metric}: {value}")

Confusion Matrix: [[197 107]
 [ 25 213]]
Precision: 0.665625
Recall: 0.8949579831932774
F1 Score: 0.7634408602150538
Accuracy: 0.7564575645756457
auc: 0.8033542222239448
Confusion Matrix: [[2302  387]
 [  10  335]]
Precision: 0.46398891966759004
Recall: 0.9710144927536232
F1 Score: 0.6279287722586692
Accuracy: 0.8691496374423203
auc: 0.7191496956634742
