In [None]:
import torch
import torchvision
from torchvision import transforms
from torch import nn, optim

import os
import pandas as pd
from torchvision.io import read_image

from torch.utils.data import Dataset
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np
import json

from IPython.display import clear_output

In [None]:
def results_analysis(results, n_FP=20, n_FN=100):
    false_positive = results.query('predicted == 1').query("label == 0").sort_values('score', ascending=False).reset_index(drop=True)
    false_negative = results.query('predicted == 0').query("label == 1").sort_values('score', ascending=True).reset_index(drop=True)
    for i, row in false_positive.iterrows():
        if i>=n_FP:
            break
        img = Image.open(os.path.join('/home/msouda/Datasets',row['img_name']))
        plt.imshow(img)
        plt.title(f"False positive - Score: {row['score']:.2f} - {row['img_name']}")
        plt.show()
        
    for i, row in false_negative.iterrows():
        if i>=n_FN:
            break
        img = Image.open(os.path.join('/home/msouda/Datasets',row['img_name']))
        plt.imshow(img)
        plt.title(f"False negative - Score: {row['score']:.2f} - {row['img_name']}")
        plt.show()
        
    fn_videos = (
        false_negative.assign(
            video=false_negative['img_name'].apply(lambda x: x.split('/')[1])
        )
        .groupby('video')
        .count()
        .sort_values('img_name', ascending=False)[['img_name']]
        .rename(columns={'img_name': 'count'})
    )
    results = results.assign(video = results['img_name'].apply(lambda x: x.split('/')[1]))
    tmp = results.query("video in @fn_videos.index").query("label == 1").groupby('video').count().sort_values('img_name', ascending=False)[['img_name']].rename(columns={'img_name': 'count'})
    #print(fn_videos.shape[0])
    #print(tmp.shape[0])
    fn_videos = fn_videos.merge(
        tmp,
        how='left',
        left_index=True,
        right_index=True,
        suffixes=('_fn', '_p')
    ).fillna(0).sort_values('count_p', ascending=True)
    tmp = results.query("video in @fn_videos.index").query("predicted == 1").query("label == 1").groupby('video').count().sort_values('img_name', ascending=False)[['img_name']].rename(columns={'img_name': 'count_tp'})
    fn_videos = fn_videos.merge(
        tmp,
        how='left',
        left_index=True,
        right_index=True,
    ).fillna(0)


    fp_videos = (
        false_positive.assign(
            video=false_positive['img_name'].apply(lambda x: x.split('/')[1])
        )
        .groupby('video')
        .count()
        .sort_values('img_name', ascending=False)[['img_name']]
        .rename(columns={'img_name': 'count'})
    )
    tmp = results.query("video in @fp_videos.index").query("label == 1").groupby('video').count().sort_values('img_name', ascending=False)[['img_name']].rename(columns={'img_name': 'count'})
    #print(fn_videos.shape[0])
    #print(tmp.shape[0])
    fp_videos = fp_videos.merge(
        tmp,
        how='left',
        left_index=True,
        right_index=True,
        suffixes=('_fp', '_p')
    ).fillna(0).sort_values('count_p', ascending=True)
    return fp_videos, fn_videos, false_positive, false_negative



In [None]:
def count_video(df):
    df = df.assign(video = df['img_name'].apply(lambda x: x.split('/')[1])).groupby('video').count().sort_values('img_name', ascending=False)[['img_name']].rename(columns={'img_name': 'count'})
    return df

In [None]:
def results_df_video(results, n_FP=20, n_FN=100):
    videos = results.assign(video = results['img_name'].apply(lambda x: x.split('/')[1])).groupby('video').count().sort_values('img_name', ascending=False)[['img_name']].rename(columns={'img_name': 'count_tot'})
    false_positive = results.query('predicted == 1').query("label == 0").sort_values('score', ascending=False).reset_index(drop=True)
    false_negative = results.query('predicted == 0').query("label == 1").sort_values('score', ascending=True).reset_index(drop=True)
    true_positive = results.query('predicted == 1').query("label == 1").sort_values('score', ascending=False).reset_index(drop=True)
    true_negative = results.query('predicted == 0').query("label == 0").sort_values('score', ascending=True).reset_index(drop=True)

    videos = videos.merge(
        count_video(false_positive).rename(columns={'count': 'count_fp'}),
        how='left',
        left_index=True,
        right_index=True,
    ).fillna(0)
    videos = videos.merge(
        count_video(false_negative).rename(columns={'count': 'count_fn'}),
        how='left',
        left_index=True,
        right_index=True,
    ).fillna(0)
    videos = videos.merge(
        count_video(true_positive).rename(columns={'count': 'count_tp'}),
        how='left',
        left_index=True,
        right_index=True,
    ).fillna(0)
    videos = videos.merge(
        count_video(true_negative).rename(columns={'count': 'count_tn'}),
        how='left',
        left_index=True,
        right_index=True,
    ).fillna(0)
    return videos
    


In [None]:
train_results = pd.read_csv('/home/msouda/Workspace/results/dino_anonym_50_final.pth_train_results.csv').rename(columns={'keyframe_id': 'img_name'}).drop(columns=['Unnamed: 0'])
train_results

In [None]:
test_results = pd.read_csv('/home/msouda/Workspace/results/dino_anonym_50_final.pth_test_results.csv').rename(columns={'keyframe_id': 'img_name'}).drop(columns=['Unnamed: 0'])
test_results

In [None]:
annotations = pd.read_csv('/home/msouda/Datasets/true_anonymized/annotations.csv', header=None, names=['img_name', 'class'])
annotations

In [None]:
train_results = train_results.merge(
    annotations.assign(img_name = annotations['img_name'].apply(lambda x: 'true_anonymized/'+x)),
    how='left',
    on='img_name',

)
train_results

In [None]:
test_results = test_results.merge(
    annotations.assign(img_name = annotations['img_name'].apply(lambda x: 'true_anonymized/'+x)),
    how='left',
    on='img_name',

)
test_results

In [None]:
train_results = train_results.drop('label', axis=1).rename(columns={'class': 'label'})

In [None]:
test_results = test_results.drop('label', axis=1).rename(columns={'class': 'label'})

In [None]:
false_positive = test_results.query('predicted == 1').query("label == 0").sort_values('score', ascending=False).reset_index(drop=True)
false_negative = test_results.query('predicted == 0').query("label == 1").sort_values('score', ascending=True).reset_index(drop=True)
true_positive = test_results.query('predicted == 1').query("label == 1").sort_values('score', ascending=False).reset_index(drop=True)
true_negative = test_results.query('predicted == 0').query("label == 0").sort_values('score', ascending=True).reset_index(drop=True)

In [None]:
print(f'Accuracy = {(true_positive.shape[0]+true_negative.shape[0])/test_results.shape[0]:.2f}')
print(f'Precision = {true_positive.shape[0]/(true_positive.shape[0]+false_positive.shape[0]):.2f}')
print(f'Recall = {true_positive.shape[0]/(true_positive.shape[0]+false_negative.shape[0]):.2f}')
print(f'F1 = {2*true_positive.shape[0]/(2*true_positive.shape[0]+false_positive.shape[0]+false_negative.shape[0]):.2f}')


In [None]:
fp_videos, fn_videos,fp,fn = results_analysis(test_results, n_FP=40, n_FN=0)
fn_videos

In [None]:
tmp = fp.query('score<0.999').query('score>0.98').assign(video = fp['img_name'].apply(lambda x: x.split('/')[1])).sort_values(['video', 'img_name'])
print(f'Absolute false positives: {tmp.shape[0]}')
for i, row in tmp.iterrows():
    img = Image.open(os.path.join('/home/msouda/Datasets',row['img_name']))
    plt.imshow(img)
    plt.title(f"False positive - Score: {row['score']:.2f} - {row['img_name']}")
    plt.show()
        

In [None]:
fn

In [None]:
tmp = fn.query('score<0.001').assign(video = fn['img_name'].apply(lambda x: x.split('/')[1])).sort_values(['video', 'img_name'])
print(f'Absolute false negatives: {tmp.shape[0]}')
for i, row in tmp.iterrows():
    img = Image.open(os.path.join('/home/msouda/Datasets',row['img_name']))
    plt.imshow(img)
    plt.title(f"False negatives - Score: {row['score']:.2f} - {row['img_name']}")
    plt.show()
        

In [None]:
fp_videos.sort_values("count_fp", ascending=False).head(10)

In [None]:
tmp.query("sec == 's2539'")

In [None]:
tmp = fp.assign(video = fp['img_name'].apply(lambda x: x.split('/')[1])).query("video == 'fr2_20100506T203819'").sort_values('score', ascending=False).query('score>0.99')
if len(tmp)>0:
    tmp = tmp.assign(sec = fp['img_name'].apply(lambda x: x.split('/')[2].split('_')[2].split('.')[0])).sort_values('sec', ascending=True).reset_index(drop=True)
N = len(tmp)
treated = []
i = 0
print(f"{N} images")

In [None]:
print(tmp.loc[i])
treated.append(tmp['img_name'][i])
i+=1

In [None]:
train_results[train_results["img_name"].str.contains('c+__20100205T222818')]

In [None]:
train_results[train_results.assign(img_name = train_results["img_name"].apply(lambda x: x[16:]))["img_name"].str.startswith('c+__20100205T222818')]#.query("label == 1")

In [None]:
print(false_negative['score'].describe())
false_negative['score'].hist(bins=20)

In [None]:
print(false_positive['score'].describe())
false_positive['score'].hist(bins=20)

In [None]:
test_results = pd.read_csv('results_test_4.csv').rename(columns={'keyframe_id': 'img_name'})

In [None]:
fp_videos, fn_videos,fp,fn = results_analysis(test_results, n_FP=1000, n_FN=0)
fn_videos

In [None]:
list_fp_videos = fp_videos.index.to_list()
i=0

In [None]:
video = list_fp_videos[i]
print(video)
i+=1
tmp = fp.assign(img_name = fp['img_name'].apply(lambda x: x[16:])).query("img_name.str.startswith(@video)").sort_values('score', ascending=False)
tmp = tmp.assign(second = tmp['img_name'].apply(lambda x: x.split('/')[1][4:].split('_')[1]))
for j, row in tmp.iterrows():
    img = Image.open(os.path.join('/home/msouda/Datasets/true_anonymized',row['img_name']))
    plt.imshow(img)
    plt.title(f"False positive - Score: {row['score']:.2f} - {row['img_name']}")
    plt.show()
tmp

In [None]:
list_fn_videos = fn_videos.index.to_list()
i=0

In [None]:
video = list_fn_videos[i]
print(video)
i+=1
tmp = fn.assign(img_name = fn['img_name'].apply(lambda x: x[16:])).query("img_name.str.startswith(@video)").sort_values('score', ascending=True)
tmp = tmp.assign(second = tmp['img_name'].apply(lambda x: x.split('/')[1][4:].split('_')[1]))
for j, row in tmp.iterrows():
    img = Image.open(os.path.join('/home/msouda/Datasets/true_anonymized',row['img_name']))
    plt.imshow(img)
    plt.title(f"False negative - Score: {row['score']:.2f} - {row['img_name']}")
    plt.show()
tmp

In [None]:
tmp

In [None]:
print(fn['score'].describe())
fn['score'].hist(bins=20)

In [None]:
print(fp['score'].describe())
fp['score'].hist(bins=20)

In [None]:
print(test_results['score'].describe())
test_results['score'].hist(bins=50)

In [None]:
test_results[test_results['score'] >0.99]

In [None]:
def trans(x,epsilon,n):
    if x == 0:
        return 0
    elif x == 1:
        return 1
    else :
        return np.exp(np.log(x+epsilon)/n)

In [None]:
a = pd.Series(np.vectorize(trans)(test_results['score'].to_numpy(),min(test_results['score'][test_results['score']>0])*1e-50, 20))
print(a.describe())
a.hist(bins=50)

In [None]:
epsilon = min(test_results['score'][test_results['score']>0])*1e-50
print(test_results['score'].apply(lambda x: np.log(x+epsilon)).describe())
test_results['score'].apply(lambda x: np.log(x+epsilon)).hist(bins=50)

In [None]:
print(test_results['score'].apply(lambda x: logn(x,1000)/logn(1,1000)).describe())
test_results['score'].apply(lambda x: logn(x,1000)/logn(1,1000)).hist(bins=50)

In [None]:
def logn(x,n):
    for i in range(n):
        x = np.log(x+1)
    return x

In [None]:
count = fp.assign(year = fp['img_name'].apply(lambda x: x.split('/')[1][4:8])).groupby('year').count()[['img_name']].rename(columns={'img_name': 'count_fp'})
mean = fp.assign(year = fp['img_name'].apply(lambda x: x.split('/')[1][4:8])).groupby('year').mean(numeric_only=True)
_count = fp.query('score>0.999').assign(year = fp['img_name'].apply(lambda x: x.split('/')[1][4:8])).groupby('year').count()[['img_name']].rename(columns={'img_name': 'count_fp>0.99'})
count = count.assign(mean_fp_score = mean['score'], count_fp99 = _count['count_fp>0.99'], ratio99 = _count['count_fp>0.99']/count['count_fp'])
count

In [None]:
count2 = test_results.assign(year = test_results['img_name'].apply(lambda x: x.split('/')[1][4:8])).groupby('year').count()[['img_name']].rename(columns={'img_name': 'count_tot'})
count2.merge(count, how='left', left_index=True, right_index=True)

In [None]:
results_video_test = results_df_video(test_results)

In [None]:
train_results = pd.read_csv('results_train_3.csv').rename(columns={'keyframe_id': 'img_name'})
false_positive = train_results.query('predicted == 1').query("label == 0").sort_values('score', ascending=False).reset_index(drop=True)
false_positive['score'].describe()

In [None]:
train_results

In [None]:
false_negative = train_results.query('predicted == 0').query("label == 1").sort_values('score', ascending=True).reset_index(drop=True)
false_negative['score'].describe()

In [None]:
test_results = pd.read_csv('results_test_3bis.csv').rename(columns={'keyframe_id': 'img_name'})

In [None]:
false_negative = test_results.query('predicted == 0').query("label == 1").sort_values('score', ascending=True).reset_index(drop=True)
false_negative['score'].describe()

In [None]:
test_results = pd.read_csv('results_test_3.csv').rename(columns={'keyframe_id': 'img_name'})

In [None]:
false_negative = test_results.query('predicted == 0').query("label == 1").sort_values('score', ascending=True).reset_index(drop=True)
false_negative['score'].describe()

In [None]:
with open('../../train_metadata/dino_anonym_20_metadata.json') as f:
    dino_metadata = json.load(f)

In [None]:
def plot_metadata(metadata):
    print(f"_________Model : {metadata['model_name']}_________\n")
    print(f"Info : {metadata['num_epochs']} epochs - {metadata['batch_size']} batch size - {metadata['learning_rate']} learning rate - {metadata['pretrained']} pretrained - Trained for {metadata['train_duration']/3600:.2f} hours \n")
    plt.plot(metadata['train_loss'], label='train_loss')
    plt.plot(metadata['test_loss'], label='val_loss')
    plt.legend()
    plt.title('Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show()
    plt.plot(metadata['test_accuracy'], label='test_accuracy')
    plt.plot(metadata['test_precision'], label='test_precision')
    plt.plot(metadata['test_recall'], label='test_recall')
    plt.plot(metadata['test_f1'], label='test_f1')
    plt.legend()
    plt.title('Metrics')
    plt.xlabel('Epoch')
    plt.ylabel('Metrics')
    plt.show()


In [None]:
plot_metadata(dino_metadata)