In [2]:
import numpy as np
import pandas as pd
import random
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# loading data
preds = np.load("../results/preds_linear.np")
label = np.load("../results/lab_linear.np")
print(preds.shape)

# create prd dataframe
preds = pd.Series(preds.tolist())
res = pd.DataFrame({'pred':preds, 'label': label})
res["count"] = res.apply(lambda row: len(res[res["label"] == row["label"]]), axis=1)
res = res[res["count"] > 100]


print(len(res))

(4975, 128)
0


In [5]:
def create_positif(df, nbr):
    duets = []
    list_label = list(set(df["label"].tolist()))
    
    for elem in range(nbr):
        chosen_label = random.choice(list_label)
        
        # select duet
        select = df[df["label"] == chosen_label]
        samp = select.sample(2)
        samp = samp["pred"].tolist()
        d = (samp[0], samp[1])
        
        duets.append(d)
        
    return duets
        
        

def create_negatif(df, nbr):
    duets = []
    list_label = list(set(df["label"].tolist()))
    
    for elem in range(nbr):
        chosen_label = random.choice(list_label)
        
        # select duet
        pos = df[df["label"] == chosen_label]
        neg = df[~(df["label"] == chosen_label)]
        
        p_samp = pos.sample(1)
        n_samp = neg.sample(1)
        
        p_samp = p_samp["pred"].tolist()[0]
        n_samp = n_samp["pred"].tolist()[0]
        
        
        d = (p_samp, n_samp)
        
        duets.append(d)
        
    return duets
        



In [39]:
positives = create_positif(res, 5000)
negatives = create_negatif(res, 5000)

print(len(positives))
print(len(negatives))

5000
5000


In [43]:
# Evaluation of the model

limit_dist = 1

tp = 0
tn = 0
fp = 0
fn = 0

# Evaluate positif
cosine_pos = []
eucli = []
for pos in positives:
    t1 = np.array([pos[0]])
    t2 = np.array([pos[1]])
    cosine_pos.append(cosine_similarity(t1, t2)[0][0])
    eucli.append(np.linalg.norm(t1[0]-t2[0]))
    if cosine_pos[-1] <= limit_dist:
        tp += 1
    else:
        fn += 1

c = np.array(cosine_pos)
e = np.array(eucli)
print("Moyenne consine {} for {}".format(c.mean(), len(c)))
print("Moyenne eucli {} for {}".format(e.mean(), len(e)))


cosine_neg = []
eucli = []
for neg in negatives:
    t1 = np.array([neg[0]])
    t2 = np.array([neg[1]])
    cosine_neg.append(cosine_similarity(t1, t2)[0][0])
    eucli.append(np.linalg.norm(t1[0]-t2[0]))
    
    if cosine_pos[-1] > limit_dist:
        tn += 1
    else:
        fp += 1
    
c = np.array(cosine_neg)
e = np.array(eucli)
print("Moyenne consine {}".format(c.mean()))
print("Moyenne eucli {} for {}".format(e.mean(), len(e)))

print("\n\n")

accuracy = (tp + tn) / 10000
precision = tp /(fp + tp)
recall = tp / (fn+tp)
f1 = 2*((precision*recall)/(precision+recall))

print("accuracy: {}".format(accuracy))
print("precision: {}".format(precision))
print("recall: {}".format(recall))
print("f1: {}".format(f1))

Moyenne consine 0.9999999999999889 for 5000
Moyenne eucli 8.261776593016223e-09 for 5000
Moyenne consine 0.9999999999999891
Moyenne eucli 8.166754353848952e-09 for 5000



accuracy: 0.3246
precision: 0.39364540383216107
recall: 0.6492
f1: 0.49011022195379744


In [16]:
t1 = res["pred"].tolist()[0]
t2 = res["pred"].tolist()[1000]

t2[0] += 3

print((t1 == t2))

False
