#https://betterprogramming.pub/how-to-measure-image-similarities-in-python-12f1cb2b7281

In [1]:
import cv2
import os
import image_similarity_measures
from sys import argv
from image_similarity_measures.quality_metrics import rmse, ssim, sre
import numpy as np
import pandas as pd
import csv
import datetime
%matplotlib inline
#The line above is necesary to show Matplotlib's plots inside a Jupyter Notebook
from matplotlib import pyplot as plt

import imagehash
from PIL import Image


import pickle


In [2]:
def get_sorted_dic_files(location):
    my_vid_frame_files=os.listdir(location)
    sort_dic_my_vid_frame_files={}
    for my_file in my_vid_frame_files:
        file_num=my_file.split(".")[0]
        sort_dic_my_vid_frame_files[int(file_num)]=my_file

    from collections import OrderedDict


    sort_dic_my_vid_frame_files = OrderedDict(sorted(sort_dic_my_vid_frame_files.items()))
    return sort_dic_my_vid_frame_files


def compare_frames(f1,f2,strategies):
    differences=[]
    for strategy in strategies:
        if strategy=="phash":
            diff=imagehash.phash(Image.open(f1))-imagehash.phash(Image.open(f2))
            differences.append(diff)
        elif strategy=="dhash":
            diff=imagehash.dhash(Image.open(f1))-imagehash.phash(Image.open(f2))
            differences.append(diff)
        elif strategy=="colorhash":
            diff=imagehash.colorhash(Image.open(f1))-imagehash.colorhash(Image.open(f2))
            differences.append(diff)            
        elif strategy=="ssim":
            f1_img = cv2.imread(f1)
            f2_img = cv2.imread(f2)
            diff= ssim(f1_img, f2_img)
            differences.append(diff)
        elif strategy=="sre":
            f1_img = cv2.imread(f1)
            f2_img = cv2.imread(f2)
            diff= sre(f1_img, f2_img)
            differences.append(diff)
        elif strategy=="rmse":
            f1_img = cv2.imread(f1)
            f2_img = cv2.imread(f2)
            diff= rmse(f1_img, f2_img)
            differences.append(diff)
    return differences

def store_in_diff_dic(diff_dic,strategies,diff_scores,arch_kframe_name):
    for i in range(len(strategies)):
        strategy=strategies[i]
        diff_score=diff_scores[i]
        if strategy not in diff_dic:
            diff_dic[strategy]={}
        if arch_kframe_name not in diff_dic[strategy]:
            diff_dic[strategy][arch_kframe_name]=diff_score
    return diff_dic


def get_best_diff(diff_dic,strategies,mappings):
    best_diff_dic={}
    
    for strategy in strategies:   
        best_diff_dic[strategy]={}
        dictn=diff_dic[strategy]
        if strategy in mappings["min"]:
            best_arch_kframe=min(dictn, key=dictn.get)
            best_val=min(list(diff_dic[strategy].values()))
        elif strategy in mappings["max"]:
            best_arch_kframe=max(dictn, key=dictn.get)
            best_val=max(list(diff_dic[strategy].values()))
        best_diff_dic[strategy][best_arch_kframe]=best_val
#     print("best",best_diff_dic)
    return best_diff_dic
        
def store_in_result_dic(all_results,least_diff_dic,strategies,ref_kf_path_name,archive_kf_location):
    for strategy in strategies:
        score=list(least_diff_dic[strategy].values())[0]
        best_match_archive_kf_fname=list(least_diff_dic[strategy].keys())[0]
        all_results["ref_kf_path_name"].append(ref_kf_path_name)
        all_results["arch_kf_path"].append(archive_kf_location)
        all_results["arch_kf_name"].append(best_match_archive_kf_fname)        
        all_results["strategy"].append(strategy)
        all_results["score"].append(score)    
    return all_results
        
        


In [3]:
strategies=["phash"]
mappings={}
mappings["min"]=["phash","dhash","rmse"]
mappings["max"]=["ssim","sre"]



In [4]:
hash_locations="../intermediate/hash_values/phash/dic_vals.p"
dic_hash_vals = pickle.load( open( hash_locations, "rb" ) )

In [5]:
reference_locations=[]
path_to_ref="/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/data/references/"
path_to_ref_kf="/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/"
for f in os.listdir(path_to_ref):
    f=f.split(".")[0]
    reference_locations.append(path_to_ref_kf+f)
    
print(reference_locations,len(reference_locations))

['/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c05_202102080735', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c03_202102071755', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c05_202101300845', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c01_202102140630', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c05_202102012240', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c01_202102260630', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c01_202101301300', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c05_202102010750', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c01_202101301200', '/ddn/gfxhome/asislam25/projects/other_misc_p

In [6]:
archive_locations=[]
path_to_archive="/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/data/setC/positive-c.zip/"
path_to_archive_kf="/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/"
for f in os.listdir(path_to_archive):
    f=f.split(".")[0]

    archive_locations.append(path_to_archive_kf+f)
    
print(archive_locations,len(archive_locations))    

['/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c09_20210210135939_C2', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c01_20210203062229_C4', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c01_20210205062047_C4', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c09_20210222174848_C1', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c09_20210227162920_C1', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c01_20210210081914_C3', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c01_20210131062134_C1', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c09_20210224174314_C2', '/ddn/gfxhome/asislam25/projects/other_misc_projects/video_copy/intermediate/keyframes/c05_20210203202303_C1', 

In [7]:
dic_hash_vals.keys()

dict_keys(['c03_202102071755', 'c01_202101301300', 'c01_202101301200', 'c02_202101301300', 'c08_202102021331', 'c01_202101300810', 'c05_202101301900', 'c01_202101300630', 'c06_202102010850', 'c01_202101301250', 'c01_202101301255', 'c09_20210210135939_C2', 'c01_20210203062229_C4', 'c01_20210205062047_C4', 'c01_20210210081914_C3', 'c01_20210131062134_C1', 'c05_20210203202303_C1', 'c01_20210130210232_C4', 'c01_20210130124435_C1', 'c01_20210131205546_C2', 'c01_20210130063213_C2', 'c01_20210202125721_C4', 'c01_20210130115216_C3', 'c05_20210205081528_C1', 'c01_20210202195259_C1', 'c01_20210131125108_C2', 'c01_20210202083021_C1', 'c01_20210130124857_C2', 'c01_20210131125922_C2', 'c01_20210130195400_C4', 'c01_20210130081629_C3', 'c01_20210204062142_C4', 'c09_20210204140043_C4', 'c01_20210131194822_C3', 'c01_20210201082747_C1', 'c01_20210202062315_C1', 'c01_20210130194220_C2', 'c05_202102080735', 'c05_202101300845', 'c01_202102140630', 'c05_202102012240', 'c01_202102260630', 'c05_202102010750',

In [8]:



def compare_kframes(list_references_kf_locations,list_archives_kf_locations,strategies,mappings,res_location,dic_hash_vals):
    all_results={}
    all_results["ref_kf_path_name"]=[]
    all_results["arch_kf_path"]=[]
    all_results["arch_kf_name"]=[]    
    all_results["strategy"]=[]
    all_results["score"]=[]    
    ref_count=0
    total_ref_count=len(list_references_kf_locations)
    for references_kf_location in list_references_kf_locations:        
        ref_count+=1        
        ref_file_name=references_kf_location.split("/")[-1]
        e = datetime.datetime.now()
        print(e,ref_file_name,ref_count,"/",total_ref_count)
        sorted_dict_reference_kf=get_sorted_dic_files(references_kf_location)
        ref_hash=dic_hash_vals[ref_file_name]
        
        count=0
        for ref_kf_num, ref_kf_file_name in sorted_dict_reference_kf.items():  
            ref_kf_hash_val=ref_hash[ref_kf_file_name]
#             print("start",ref_kf_num,ref_kf_file_name,ref_kf_hash_val)
            count+=1
            percent_complete=int(100*count/len(list(sorted_dict_reference_kf.keys())))
            if percent_complete%10==0:
                print(percent_complete,"% complete at ",datetime.datetime.now())
#             print("\t",ref_kf_num, ref_kf_file_name)
            f1=os.path.join(references_kf_location,ref_kf_file_name)
            for archive_kf_location in list_archives_kf_locations:
                arch_file_name=archive_kf_location.split("/")[-1]
                arch_hash=dic_hash_vals[arch_file_name]
                sorted_dict_arch_kf=get_sorted_dic_files(archive_kf_location)
                diff_dic={}
                # above will contain scores for each frame of reference key frames
                for arch_kf_num,arch_kf_file_name in sorted_dict_arch_kf.items():   
                    arch_kf_hash_val=arch_hash[arch_kf_file_name]
                    f2=os.path.join(archive_kf_location,arch_kf_file_name)
#                     print(f1,f2)
#                     diff_scores=compare_frames(f1,f2,strategies)
                    diff_scores=[ref_kf_hash_val-arch_kf_hash_val]
                    diff_dic=store_in_diff_dic(diff_dic,strategies,diff_scores,arch_kf_file_name)
#                 print(diff_dic)
                # finished with one set of key frames
#                 print("\t ",diff_dic)
                best_diff_dic=get_best_diff(diff_dic,strategies,mappings)
#                 print("\t\t",best_diff_dic)
                all_results=store_in_result_dic(all_results,best_diff_dic,strategies,f1,archive_kf_location)
                df=pd.DataFrame(all_results)
                df.to_csv(res_location,index=False)
               
    
    return all_results
                
    

In [None]:

# reference_locations=[
#     "/Users/ashhadulislam/projects/other_misc/video_copy/intermediate/keyframes/c01_202101300630",
#     "/Users/ashhadulislam/projects/other_misc/video_copy/intermediate/keyframes/c01_202101300810"    
# ]

# archive_locations=[
#     "/Users/ashhadulislam/projects/other_misc/video_copy/intermediate/keyframes/c01_20210130063213_C2",
#     "/Users/ashhadulislam/projects/other_misc/video_copy/intermediate/keyframes/c01_20210130081629_C3",    
#     "/Users/ashhadulislam/projects/other_misc/video_copy/intermediate/keyframes/c01_20210130115216_C3"
# ]
    
    
try:
    if not os.path.isdir("../results"):
        os.mkdir("../results")
except OSError:
        print("Error cant make directories")    

location="../results/res.csv"
all_results=compare_kframes(reference_locations,archive_locations,strategies,mappings,location,dic_hash_vals)

2022-10-06 23:08:35.075337 c05_202102080735 1 / 30
0 % complete at  2022-10-06 23:08:35.076692
10 % complete at  2022-10-06 23:08:51.236497
10 % complete at  2022-10-06 23:08:52.128836
20 % complete at  2022-10-06 23:09:10.229693
20 % complete at  2022-10-06 23:09:11.349080
30 % complete at  2022-10-06 23:09:33.660953
30 % complete at  2022-10-06 23:09:35.009017
40 % complete at  2022-10-06 23:10:01.292314
40 % complete at  2022-10-06 23:10:02.864126
50 % complete at  2022-10-06 23:10:33.609925
50 % complete at  2022-10-06 23:10:35.426393
60 % complete at  2022-10-06 23:11:09.771548
60 % complete at  2022-10-06 23:11:11.790840
70 % complete at  2022-10-06 23:11:51.082899
70 % complete at  2022-10-06 23:11:53.443326
80 % complete at  2022-10-06 23:12:38.810841
80 % complete at  2022-10-06 23:12:41.280684
90 % complete at  2022-10-06 23:13:27.956064
90 % complete at  2022-10-06 23:13:30.719898
100 % complete at  2022-10-06 23:14:22.770773
2022-10-06 23:14:25.679650 c03_202102071755 2 / 3

In [None]:
df=pd.DataFrame(all_results)


In [None]:
df.shape

In [None]:
df.head()

In [None]:
try:
    if not os.path.isdir("../results"):
        os.mkdir("../results")
    df.to_csv("../results/res_all.csv",index=False)
except OSError:
        print("Error cant make directories")