In [1]:
import cv2
import os
import image_similarity_measures
from sys import argv
from image_similarity_measures.quality_metrics import rmse, ssim, sre
import numpy as np
import pandas as pd
import csv
import datetime
from matplotlib import pyplot as plt

import imagehash
from PIL import Image


import pickle


import time
import multiprocessing

In [2]:
def get_sorted_dic_files(location):
    my_vid_frame_files=os.listdir(location)
    sort_dic_my_vid_frame_files={}
    for my_file in my_vid_frame_files:
        file_num=my_file.split(".")[0]
        sort_dic_my_vid_frame_files[int(file_num)]=my_file

    from collections import OrderedDict


    sort_dic_my_vid_frame_files = OrderedDict(sorted(sort_dic_my_vid_frame_files.items()))
    return sort_dic_my_vid_frame_files


def compare_frames(f1,f2,strategies):
    differences=[]
    for strategy in strategies:
        if strategy=="phash":
            diff=imagehash.phash(Image.open(f1))-imagehash.phash(Image.open(f2))
            differences.append(diff)
        elif strategy=="dhash":
            diff=imagehash.dhash(Image.open(f1))-imagehash.phash(Image.open(f2))
            differences.append(diff)
        elif strategy=="colorhash":
            diff=imagehash.colorhash(Image.open(f1))-imagehash.colorhash(Image.open(f2))
            differences.append(diff)            
        elif strategy=="ssim":
            f1_img = cv2.imread(f1)
            f2_img = cv2.imread(f2)
            diff= ssim(f1_img, f2_img)
            differences.append(diff)
        elif strategy=="sre":
            f1_img = cv2.imread(f1)
            f2_img = cv2.imread(f2)
            diff= sre(f1_img, f2_img)
            differences.append(diff)
        elif strategy=="rmse":
            f1_img = cv2.imread(f1)
            f2_img = cv2.imread(f2)
            diff= rmse(f1_img, f2_img)
            differences.append(diff)
    return differences

def store_in_diff_dic(diff_dic,strategies,diff_scores,arch_kframe_name):
    for i in range(len(strategies)):
        strategy=strategies[i]
        diff_score=diff_scores[i]
        if strategy not in diff_dic:
            diff_dic[strategy]={}
        if arch_kframe_name not in diff_dic[strategy]:
            diff_dic[strategy][arch_kframe_name]=diff_score
    return diff_dic


def get_best_diff(diff_dic,strategies,mappings):
    best_diff_dic={}
    
    for strategy in strategies:   
        best_diff_dic[strategy]={}
        dictn=diff_dic[strategy]
        if strategy in mappings["min"]:
            best_arch_kframe=min(dictn, key=dictn.get)
            best_val=min(list(diff_dic[strategy].values()))
        elif strategy in mappings["max"]:
            best_arch_kframe=max(dictn, key=dictn.get)
            best_val=max(list(diff_dic[strategy].values()))
        best_diff_dic[strategy][best_arch_kframe]=best_val
#     print("best",best_diff_dic)
    return best_diff_dic
        
def store_in_result_dic(all_results,least_diff_dic,strategies,ref_kf_path_name,archive_kf_location):
    for strategy in strategies:
        score=list(least_diff_dic[strategy].values())[0]
        best_match_archive_kf_fname=list(least_diff_dic[strategy].keys())[0]
        all_results["ref_kf_path_name"].append(ref_kf_path_name)
        all_results["arch_kf_path"].append(archive_kf_location)
        all_results["arch_kf_name"].append(best_match_archive_kf_fname)        
        all_results["strategy"].append(strategy)
        all_results["score"].append(score)    
    return all_results

In [3]:
strategies=["phash"]
mappings={}
mappings["min"]=["phash","dhash","rmse"]
mappings["max"]=["ssim","sre"]

In [4]:
hash_locations="../intermediate/hash_values/phash/dic_vals.p"
dic_hash_vals = pickle.load( open( hash_locations, "rb" ) )

In [5]:
reference_locations=[]
path_to_ref= '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/data/references/'
path_to_ref_kf='/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/'
for f in os.listdir(path_to_ref):
    if ".DS_Store" in f:
        continue
    f=f.split(".")[0]
    reference_locations.append(path_to_ref_kf+f)
    
print(reference_locations,len(reference_locations))

['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c02_202102021400', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c03_202101302035', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c08_202102011415', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c09_202102062300', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c02_202102011845', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c05_202102011730', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c09_202101311000', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c05_202102060650', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c03_202102011827', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c05_2021

In [6]:
archive_locations=[]
path_to_archive='/Users/althanin/Desktop/Keyframes Project/conf/video_copy/data/setC/positive-c.zip/'
path_to_archive_kf='/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/'
for f in os.listdir(path_to_archive):
    if ".DS_Store" in f:
            continue
    f=f.split(".")[0]

    archive_locations.append(path_to_archive_kf+f)
    
print(archive_locations,len(archive_locations))    

['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130190029_C4', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130210232_C4', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130081629_C3', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130203943_C3', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130145505_C4', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130124857_C2', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130204907_C4', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130133957_C4', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c01_20210130195400_C4', '/Users/althanin/Desktop/Keyframes Project/co

In [7]:
dic_hash_vals.keys()

dict_keys(['c02_202102021400', 'c03_202101302035', 'c08_202102011415', 'c09_202102062300', 'c02_202102011845', 'c05_202102011730', 'c09_202101311000', 'c05_202102060650', 'c03_202102011827', 'c05_202102021630', 'c07_202102021300', 'c03_202102021700', 'c05_202102010639', 'c02_202102022040', 'c01_202102140630', 'c03_202101301930', 'c03_202101301515', 'c02_202101302056', 'c02_202102120045', 'c05_202101300920', 'c01_202102032105', 'c03_202101301715', 'c05_202102011900', 'c02_202101310915', 'c05_202101301745', 'c01_202102120105', 'c06_202101301015', 'c03_202101300740', 'c08_202102051027', 'c03_202101300620', 'c01_202101301750', 'c03_202102011955', 'c03_202101301710', 'c06_202102010740', 'c09_202102040010', 'c01_202102010955', 'c01_202101301340', 'c07_202101311705', 'c02_202102032240', 'c07_202102011945', 'c07_202101301700', 'c08_202102011943', 'c05_202102012000', 'c03_202102011405', 'c05_202101311340', 'c01_202102160135', 'c02_202102021349', 'c01_202102032315', 'c08_202102042114', 'c03_2021

In [8]:
def compare_kframes(list_references_kf_locations,list_archives_kf_locations,strategies,mappings,res_location,dic_hash_vals):
    all_results={}
    all_results["ref_kf_path_name"]=[]
    all_results["arch_kf_path"]=[]
    all_results["arch_kf_name"]=[]    
    all_results["strategy"]=[]
    all_results["score"]=[]    
    ref_count=0
    total_ref_count=len(list_references_kf_locations)
    for references_kf_location in list_references_kf_locations:        
        ref_count+=1        
        ref_file_name=references_kf_location.split("/")[-1]
        e = datetime.datetime.now()
        print(e,ref_file_name,ref_count,"/",total_ref_count)
        sorted_dict_reference_kf=get_sorted_dic_files(references_kf_location)
        ref_hash=dic_hash_vals[ref_file_name]
        
        count=0
        for ref_kf_num, ref_kf_file_name in sorted_dict_reference_kf.items():  
            ref_kf_hash_val=ref_hash[ref_kf_file_name]
#             print("start",ref_kf_num,ref_kf_file_name,ref_kf_hash_val)
            count+=1
            percent_complete=int(100*count/len(list(sorted_dict_reference_kf.keys())))
            if percent_complete%10==0:
                print(percent_complete,"% complete at ",datetime.datetime.now())
#             print("\t",ref_kf_num, ref_kf_file_name)
            f1=os.path.join(references_kf_location,ref_kf_file_name)
            for archive_kf_location in list_archives_kf_locations:
                arch_file_name=archive_kf_location.split("/")[-1]
                arch_hash=dic_hash_vals[arch_file_name]
                sorted_dict_arch_kf=get_sorted_dic_files(archive_kf_location)
                diff_dic={}
                # above will contain scores for each frame of reference key frames
                for arch_kf_num,arch_kf_file_name in sorted_dict_arch_kf.items():   
                    arch_kf_hash_val=arch_hash[arch_kf_file_name]
                    f2=os.path.join(archive_kf_location,arch_kf_file_name)
#                     print(f1,f2)
#                     diff_scores=compare_frames(f1,f2,strategies)
                    diff_scores=[ref_kf_hash_val-arch_kf_hash_val]
                    diff_dic=store_in_diff_dic(diff_dic,strategies,diff_scores,arch_kf_file_name)
#                 print(diff_dic)
                # finished with one set of key frames
#                 print("\t ",diff_dic)
                best_diff_dic=get_best_diff(diff_dic,strategies,mappings)
#                 print("\t\t",best_diff_dic)
                all_results=store_in_result_dic(all_results,best_diff_dic,strategies,f1,archive_kf_location)
                df=pd.DataFrame(all_results)
                df.to_csv(res_location,index=False)
               
    
    return all_results

### Apply multi processing

In [10]:
reference_locations=reference_locations[:20]
archive_locations=archive_locations[:10]

In [11]:
len(reference_locations),len(archive_locations)

(20, 10)

In [19]:
def compare_with_multi_processing(proc_num,archive_locations,reference_locations):
    result_location="../results/res(average_hash_temp)_process_"+str(proc_num)+".csv"
    all_results=compare_kframes(reference_locations,archive_locations,strategies,mappings,result_location,dic_hash_vals)
        

In [33]:
starttime = time.time()
proc_num=0
compare_with_multi_processing(proc_num,archive_locations,reference_locations)
print('That took {} seconds'.format(time.time() - starttime))

2022-10-22 18:20:49.768293 c02_202102021400 1 / 20
10 % complete at  2022-10-22 18:20:50.216139
20 % complete at  2022-10-22 18:20:50.755322
30 % complete at  2022-10-22 18:20:51.294871
40 % complete at  2022-10-22 18:20:51.841537
50 % complete at  2022-10-22 18:20:52.404919
60 % complete at  2022-10-22 18:20:52.939652
70 % complete at  2022-10-22 18:20:53.511929
80 % complete at  2022-10-22 18:20:54.112490
90 % complete at  2022-10-22 18:20:54.670581
100 % complete at  2022-10-22 18:20:55.252235
2022-10-22 18:20:55.406448 c03_202101302035 2 / 20
10 % complete at  2022-10-22 18:20:56.300721
20 % complete at  2022-10-22 18:20:57.361773
30 % complete at  2022-10-22 18:20:58.454649
40 % complete at  2022-10-22 18:20:59.533483
50 % complete at  2022-10-22 18:21:00.747120
60 % complete at  2022-10-22 18:21:01.909963
100 % complete at  2022-10-22 18:21:06.927963
2022-10-22 18:21:07.108134 c08_202102011415 3 / 20
50 % complete at  2022-10-22 18:21:08.033694
100 % complete at  2022-10-22 18:21

100 % complete at  2022-10-22 18:33:33.889620
2022-10-22 18:33:34.747029 c03_202101301515 17 / 20
50 % complete at  2022-10-22 18:33:37.506007
100 % complete at  2022-10-22 18:33:41.199376
2022-10-22 18:33:42.152244 c02_202101302056 18 / 20
30 % complete at  2022-10-22 18:33:48.471779
50 % complete at  2022-10-22 18:33:52.949825
80 % complete at  2022-10-22 18:34:00.508228
100 % complete at  2022-10-22 18:34:05.383995
2022-10-22 18:34:06.252315 c02_202102120045 19 / 20
10 % complete at  2022-10-22 18:34:07.243158
20 % complete at  2022-10-22 18:34:09.246833
30 % complete at  2022-10-22 18:34:11.143433
40 % complete at  2022-10-22 18:34:12.940732
50 % complete at  2022-10-22 18:34:14.800851
60 % complete at  2022-10-22 18:34:16.651804
70 % complete at  2022-10-22 18:34:18.557649
80 % complete at  2022-10-22 18:34:20.470842
90 % complete at  2022-10-22 18:34:22.391686
100 % complete at  2022-10-22 18:34:24.342604
2022-10-22 18:34:25.300643 c05_202101300920 20 / 20
20 % complete at  2022-

In [34]:
number_of_processes=10
# divide the archive list into multiple lsits of number of processes

# Yield successive n-sized
# chunks from l.
def divide_chunks(l, n):
    # looping till length l
    for i in range(0, len(l), n):
        yield l[i:i + n]
 
# How many elements each
# list should have
n = len(reference_locations)//number_of_processes
 
chunks_div = list(divide_chunks(reference_locations, n))
print (chunks_div)

[['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c02_202102021400', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c03_202101302035'], ['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c08_202102011415', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c09_202102062300'], ['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c02_202102011845', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c05_202102011730'], ['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c09_202101311000', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c05_202102060650'], ['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c03_202102011827', '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes

In [35]:
chunks_div

[['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c02_202102021400',
  '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c03_202101302035'],
 ['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c08_202102011415',
  '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c09_202102062300'],
 ['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c02_202102011845',
  '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c05_202102011730'],
 ['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c09_202101311000',
  '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c05_202102060650'],
 ['/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermediate/keyframes/c03_202102011827',
  '/Users/althanin/Desktop/Keyframes Project/conf/video_copy/intermed

In [36]:
starttime = time.time()
processes = []
for i in range(0,number_of_processes):
#     p = multiprocessing.Process(target=my_main) #, args=(i,))
    ref_vid_list=chunks_div[i]
    p = multiprocessing.Process(target=compare_with_multi_processing, args=(i,archive_locations,ref_vid_list,))

    processes.append(p)
    p.start()

for process in processes:
    process.join()

print('That took {} seconds'.format(time.time() - starttime))

2022-10-22 18:36:30.593887 c02_2021020214002022-10-22 18:36:30.599472 1 c08_202102011415 2022-10-22 18:36:30.604276 / 1 c02_2021020118452022-10-22 18:36:30.6109142  / 1
  2022-10-22 18:36:30.618606c09_2021013110002/  
 c03_202102011827122022-10-22 18:36:30.627420  
1/   c07_2021020213000/2  2022-10-22 18:36:30.639015 
% complete at 12   
c05_20210201063902022-10-22 18:36:30.6386992022-10-22 18:36:30.650327/  
  1% complete at 2c01_202102140630  
2022-10-22 18:36:30.650402/ 2022-10-22 18:36:30.661707
 1  2c03_202101301515 
/1 2022-10-22 18:36:30.677989 2/
  c02_2021021200452 0
1  % complete at / 2022-10-22 18:36:30.694773 
2
10 % complete at 0  2022-10-22 18:36:31.022175% complete at 
 2022-10-22 18:36:31.025804
10 % complete at  2022-10-22 18:36:31.411199
50 % complete at  2022-10-22 18:36:31.486567
20 % complete at  2022-10-22 18:36:31.503939
50 % complete at  2022-10-22 18:36:31.897143
30 % complete at  2022-10-22 18:36:31.976867
20 % complete at  2022-10-22 18:36:32.369139
10 % comp

2022-10-22 18:37:23.671687 c05_202102011730 2 / 2
100 % complete at  2022-10-22 18:37:24.594973
100 % complete at  2022-10-22 18:37:25.492586
20 % complete at  2022-10-22 18:37:26.065991
30 % complete at  2022-10-22 18:37:27.142611
90 % complete at  2022-10-22 18:37:28.548383
90 % complete at  2022-10-22 18:37:28.818492
90 % complete at  2022-10-22 18:37:29.141717
60 % complete at  2022-10-22 18:37:30.767837
90 % complete at  2022-10-22 18:37:34.456526
100 % complete at  2022-10-22 18:37:35.012209
2022-10-22 18:37:35.323053 c03_202101301930 2 / 2
0 % complete at  2022-10-22 18:37:35.336314
100 % complete at  2022-10-22 18:37:35.601865
10 % complete at  2022-10-22 18:37:38.848563
10 % complete at  2022-10-22 18:37:39.148540
20 % complete at  2022-10-22 18:37:42.526079
20 % complete at  2022-10-22 18:37:42.850281
30 % complete at  2022-10-22 18:37:46.500068
40 % complete at  2022-10-22 18:37:50.372078
50 % complete at  2022-10-22 18:37:54.551336
60 % complete at  2022-10-22 18:37:58.8587

In [None]:
###################### MULTI - PROCESSING ######################


#def basic_func(x):
#    if x == 0:
#        return 'zero'
#    elif x%2 == 0:
#        return 'even'
#    else:
#        return 'odd'

#def multiprocessing_func(x):
#    y = x*x
#    time.sleep(2)
#    print('{} squared results in a/an {} number'.format(x, basic_func(y)))
    
    
def my_main():
    try:
        if not os.path.isdir("../results"):
            os.mkdir("../results")
    except OSError:
            print("Error cant make directories")    

    location="../results/res(average_hash_temp).csv"
    all_results=compare_kframes(reference_locations,archive_locations,strategies,mappings,location,dic_hash_vals)

    
if __name__ == '__main__':
    starttime = time.time()
    processes = []
    for i in range(0,14):
        p = multiprocessing.Process(target=my_main) #, args=(i,))
        processes.append(p)
        p.start()
        
    for process in processes:
        process.join()
        
    print('That took {} seconds'.format(time.time() - starttime))