## Load Truth and Predictions

In [1]:
import pandas as pd
from tqdm import tqdm

**Ground Truth**

In [2]:
df_truth = pd.read_csv('labeled_sameMovie_truth.txt', sep='\t', header=None)
print(f'df_truth.shape: {df_truth.shape}')
df_truth.head(3)

df_truth.shape: (598, 3)


Unnamed: 0,0,1,2
0,7012,1050,0
1,46,20,0
2,26,72,0


In [3]:
# Convert to a dict with double index
dict_truth = {}
for idx, (idx_a, idx_b, truth) in tqdm(df_truth.iterrows(), total=df_truth.shape[0]):
    dict_truth[(int(idx_a), int(idx_b))] = int(truth)
len(dict_truth)

100%|█████████████████████████████████████████████████████████████████████████████| 598/598 [00:00<00:00, 11184.66it/s]


598

In [4]:
dict_truth_inverse = {0:[], 1:[]}
for idx, (idx_a, idx_b, truth) in tqdm(df_truth.iterrows(), total=df_truth.shape[0]):
    if truth >= 0.5:
        dict_truth_inverse[1].append((int(idx_a), int(idx_b)))
    else:
        dict_truth_inverse[0].append((int(idx_a), int(idx_b)))
len(dict_truth_inverse[0]) + len(dict_truth_inverse[1])

100%|██████████████████████████████████████████████████████████████████████████████| 598/598 [00:00<00:00, 6520.24it/s]


598

**Predictions**

In [5]:
df_pred = pd.read_csv('./inferred-predicates_hw8/SAMEMOVIE.txt', sep='\t', header=None)
print(f'df_pred.shape: {df_pred.shape}')
df_pred.head(3)

df_pred.shape: (33148, 3)


Unnamed: 0,0,1,2
0,3377,2859,0.000665
1,10262,10262,1.0
2,3247,7671,0.000242


In [6]:
# Keep only the predictions which are in the ground truth
labeled_candidates = set(zip(df_truth[0], df_truth[1]))
df_pred_filtered = pd.DataFrame(columns = [0, 1, 2])

for idx, row in tqdm(df_pred.iterrows(), total=df_pred.shape[0]):
    if (int(row[0]), int(row[1])) in labeled_candidates or (int(row[1]), int(row[0])) in labeled_candidates:
        df_pred_filtered = df_pred_filtered.append({0: int(row[0]),
                                                    1: int(row[1]),
                                                    2: int(row[2])},
                                                   ignore_index=True)

print(f'df_pred_filtered.shape: {df_pred_filtered.shape}')
df_pred_filtered.head(3)

100%|█████████████████████████████████████████████████████████████████████████| 33148/33148 [00:02<00:00, 11286.28it/s]

df_pred_filtered.shape: (267, 3)





Unnamed: 0,0,1,2
0,3410,3410,1
1,3317,3435,0
2,3353,3353,1


The size of the my DataFrame with predictions is smaller than that of the DataFrame with the truth because the truth DataFrame containts many true non-matches, which weren't even generated by my code as nearly all of those non-matches didn't even fall within the same block, making it impossible for me to even predict their non-matchess.

In [32]:
# Convert to a dict with double index
dict_pred = {}
for idx, (idx_a, idx_b, call) in tqdm(df_pred.iterrows(), total=df_pred.shape[0]):
    dict_pred[(int(idx_a), int(idx_b))] = 1 if call >= 0.5 else 0
len(dict_pred)

100%|██████████████████████████████████████████████████████████████████████████████| 267/267 [00:00<00:00, 9889.26it/s]


267

In [33]:
# Then for all keys in the truth which were not predicted by my model, add them with a prediction for non-match
for key in dict_truth.keys():
    if key not in dict_pred.keys():
        dict_pred[key] = 0
len(dict_pred)

644

In [41]:
dict_pred_inverse = {0:[], 1:[]}
for idx, (idx_a, idx_b, call) in tqdm(df_pred_filtered.iterrows(), total=df_pred_filtered.shape[0]):
    if call >= 0.5:
        dict_pred_inverse[1].append((int(idx_a), int(idx_b)))
    else:
        dict_pred_inverse[0].append((int(idx_a), int(idx_b)))
len(dict_pred_inverse[0]) + len(dict_pred_inverse[1])

100%|██████████████████████████████████████████████████████████████████████████████| 267/267 [00:00<00:00, 9206.58it/s]


267

In [42]:
# Then for all keys in the truth which were not predicted by my model, add them with a prediction for non-match
for (idx_a, idx_b) in dict_truth_inverse[0]:
    if (idx_a, idx_b) not in dict_pred_inverse[0]:
        dict_pred_inverse[0].append((idx_a, idx_b))
len(dict_pred_inverse[0]) + len(dict_pred_inverse[1])  

640

In [43]:
for key, value in dict_pred.items():
    if key not in dict_pred_inverse.values():
        dict_pred_inverse[value].append(key)
len(dict_pred_inverse[0]) + len(dict_pred_inverse[1])  

1284

In [37]:
for key, value in dict_pred_inverse.items():
    if value not in dict_pred.values():
        dict_pred[value] = key
len(dict_pred)

644

## Binary Classification Metrics For Class 0

### Precision

"Of all calls I made, how many were correctly made?"

In [11]:
TP = 0
FP = 0

# dict_pred_inverse[1] = all the calls I made
for match in dict_pred_inverse[0]: 
    if dict_truth.get(match) == 0:
        TP += 1
    else:
        FP += 1
        
precision_0 = TP / (TP + FP)

print(f'TP: {TP:>4}')
print(f'FP: {FP:>4}')
print(f'Precision: {precision_0:.5f}')

TP:  409
FP:   58
Precision: 0.87580


### Recall

"Of all calls I should have made, how many did I make?"

In [12]:
TP = 0
FN = 0

# dict_truth_inverse[1] = all the calls I should have made
for match in dict_truth_inverse[0]: 
    if dict_pred.get(match) == 0:
        TP += 1
    else:
        FN += 1
        
recall_0 = TP / (TP + FN)

print(f'TP: {TP:>4}')
print(f'FN: {FN:>4}')
print(f'Recall: {recall_0:.5f}')

TP:  407
FN:    2
Recall: 0.99511


### F1-Score

In [13]:
F1_0 = (2 * precision_0 * recall_0) / (precision_0 + recall_0)
print(f'F1 Score: {F1_0:.5f}')

F1 Score: 0.93165


## Binary Classification Metrics For Class 1

### Precision

"Of all calls I made, how many were correctly made"

In [14]:
TP = 0
FP = 0

# dict_pred_inverse[1] = all the calls I made
for match in dict_pred_inverse[1]: 
    if dict_truth.get(match) == 1:
        TP += 1
    else:
        FP += 1
        
precision_1 = TP / (TP + FP)

print(f'TP: {TP:>4}')
print(f'FP: {FP:>4}')
print(f'Precision: {precision_1:.5f}')

TP:  171
FP:    2
Precision: 0.98844


### Recall

"Of all calls I should have made, how many did I make?"

In [15]:
TP = 0
FN = 0

# dict_truth_inverse[1] = all the calls I should have made
for match in dict_truth_inverse[1]: 
    if dict_pred.get(match) == 1:
        TP += 1
    else:
        FN += 1
        
recall_1 = TP / (TP + FN)

print(f'TP: {TP:>4}')
print(f'FN: {FN:>4}')
print(f'Recall: {recall_1:.5f}')

TP:  171
FN:   18
Recall: 0.90476


### F1-Score

In [16]:
F1_1 = (2 * precision_1 * recall_1) / (precision_1 + recall_1)
print(f'F1 Score: {F1_1:.5f}')

F1 Score: 0.94475


Matheus Schmitz

USC ID: 5039286453