In [6]:
#libraries
import cv2
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean
from sklearn.neighbors import KDTree
import time
import argparse
import json
import xml.etree.ElementTree as ET
from tqdm import tqdm
import zipfile
from skimage.color import rgb2hsv,rgb2lab, hsv2rgb, lab2rgb
import os

In [7]:
#read video file frame by frame, beginning and ending with a timestamp
def read_video_segments(video,start_frame,end_frame,resolution_width,target_colorspace):
    resolution_height=int(round(resolution_width * 9/16))
    resolution=(resolution_width,resolution_height)
    vid = cv2.VideoCapture(video)
    frames=[]
    vid_length=0
    with tqdm(total=end_frame-start_frame+1) as pbar: #init the progressbar,with max lenght of the given segment
        while(vid.isOpened()):
            # Capture frame-by-frame
            ret, frame = vid.read() # if ret is false, frame has no content
            if not ret:
                break
            # skip every "skip_frame"
            if vid_length>=start_frame:
                # resize the video to a different resolution
                frame=cv2.resize(frame,resolution)
                frame=np.array(frame,dtype='uint8')
                frames.append(frame) #add the individual frames to a list
                pbar.update(1) #update the progressbar
            if vid_length==end_frame:
                pbar.update(1)
                break
            vid_length+=1 #increase the vid_length counter
    vid.release()
    cv2.destroyAllWindows()
    frames=change_colorspace(frames,target_colorspace)
    return frames[:-1]

In [8]:
def extract_dominant_colors(frame_list,target_colorspace,path,what_to_process):
    print(str(len(frame_list))+' frames to process.')
    rgb_to_color=fn_rgb_to_color(target_colorspace,path) #get the color dict 
    bins={} #bins dict for histograms 
    for rgb in rgb_to_color: #init the dict with zeros for every key
        bins[rgb_to_color[rgb]]=0
    rgb_list=[] #create a traverseable list of the rgb_values
    for rgb in rgb_to_color: #map the values of the dict to a list
        rgb_list.append(rgb)
    i = 0

    kdt = KDTree(rgb_list, leaf_size=30, metric='euclidean')
    if what_to_process=='scene':
        for frames in tqdm(frame_list): #traverse the video
            for image in frames:
                img = image.reshape((image.shape[0] * image.shape[1], 3)) #flatten the image to 1d   
                nns = kdt.query(img, k=1, return_distance=False)
                for nn in nns:
                    bins[rgb_to_color[rgb_list[nn[0]]]]+=1
                i+=1
    else:
        for image in tqdm(frame_list): #traverse the video
            img = image.reshape((image.shape[0] * image.shape[1], 3)) #flatten the image to 1d   
            nns = kdt.query(img, k=1, return_distance=False)
            for nn in nns:
                bins[rgb_to_color[rgb_list[nn[0]]]]+=1
            i+=1
    norm_factor = len(frame_list)* np.shape(frame_list[0])[0] * np.shape(frame_list[0])[1] #normalize the binsi
    bins_norm={k:v/norm_factor for k,v in bins.items()}
    return bins_norm

In [9]:
def bins_to_df(bins,bin_threshold=5,colors_to_return=5):
    #create a dataframe, sorted descending by count
    bins_sorted=sorted(zip(list(bins.values()),list(bins.keys())),reverse=True)
    df=pd.DataFrame(bins_sorted,columns=['count','color'])
    df.set_index('color',inplace=True) #set the colors as the index of the dataframe
    bin_threshold=bin_threshold/100 #scale the percentage to 0-1
    df = df[df>bin_threshold].dropna() #kick bins from the dataframe with precentage lower than bin_threshold 
    return df.head(colors_to_return)#return the color_return highest bins, default 5, if less bins then
                                    #color_return are there return all

In [10]:
# def fn_rgb_to_color(target_colorspace,path):
#     if (path != 'full'):
#                 colors = {}
#                 with open(path) as f:
#                     for line in f:
#                         #split lines at "::
#                         color, rgb = line.strip().split(':')
#                         #strip the rgb-string of the parenthesis, split it up a the commas,
#                         #cast them to int and put them into a tuples
#                         rgb_value=tuple(map(int,(rgb.strip('(').strip(')').split(','))))
#                         colors[color]=rgb_value
#             else:
#                 colors={'darkred':(139,0,0),
#                 'firebrick':(178,34,34),
#                 'crimson':(220,20,60),
#                 'red':(255,0,0),
#                 'tomato':(255,99,71),
#                 'salmon':(250,128,114),
#                 'darkorange':(255,140,0),
#                 'gold':(255,215,0),
#                 'darkkhaki':(189,183,107),
#                 'yellow':(255,255,0),
#                 'darkolivegreen':(85,107,47),
#                 'olivedrab':(107,142,35),
#                 'greenyellow':(173,255,47),
#                 'darkgreen':(0,100,0),
#                 'aquamarine':(127,255,212),
#                 'steelblue':(70,130,180),
#                 'skyblue':(135,206,235),
#                 'darkblue':(0,0,139),
#                 'blue':(0,0,255),
#                 'royalblue':(65,105,225),
#                 'purple':(128,0,128),
#                 'violet':(238,130,238),
#                 'deeppink':(255,20,147),
#                 'pink':(255,192,203),
#                 'antiquewhite':(250,235,215),
#                 'saddlebrown':(139,69,19),
#                 'sandybrown':(244,164,96),
#                 'ivory':(255,255,240),
#                 'dimgrey':(105,105,105),
#                 'grey':(28,128,128),
#                 'silver':(192,192,192),
#                 'lightgrey':(211,211,211),
#                 'black':(0,0,0),
#                 'white':(255,255,255),
#                 'darkcyan':(0,139,139),
#                 'cyan':(0,255,255),
#                 'green':(0,128,0),
#                 'khaki':(240,230,140),
#                 'goldenrod':(218,165,32),
#                 'orange':(255,165,0),
#                 'coral':(255,127,80),
#                 'magenta':(255,0,255),
#                 'wheat':(245,222,179),
#                 'skin':(255,224,189),
#                 'purple4':(147,112,219)}

#             colors_aux={}
#             if target_colorspace=='HSV':
#                 print('HSV')
#                 for color in colors:
#                     a = np.array((colors[color]),dtype='uint8')
#                     b = a.reshape(1,1,3)
#                     c = cv2.cvtColor(b,cv2.COLOR_RGB2HSV)
#                     colors_aux[color]=tuple(c.reshape(3))
#                 colors=colors_aux
#             if target_colorspace=='cie-lab':
#                 print('cie-lab')
#                 for color in colors:
#                     a = np.array((colors[color]),dtype='uint8')
#                     b = a.reshape(1,1,3)
#                     c = cv2.cvtColor(b,cv2.COLOR_RGB2LAB)
#                     colors_aux[color]=tuple(c.reshape(3))
#                 colors=colors_aux

#             rgb_to_color={}
#             for color in colors:
#                 rgb_to_color[colors[color]]=color
#             #purple4 is median purple
#             #skin is caucasian        
#             return rgb_to_color

In [11]:
def read_azp(azp_path):
    #extract the .azp-file to /tmp
    zip_ref = zipfile.ZipFile(azp_path)
    zip_ref.extractall('/tmp')
    #read the .xml-file
    tree = ET.parse('/tmp/content.xml')
    root = tree.getroot().findall('./{http://experience.univ-lyon1.fr/advene/ns}annotations')
    #traverse the .xml-file
    with open(args.output_path,'w') as file:
            if args.what_to_process=='scene':
                segment_list=[]
            for child in root[0].iter():
                if child.get('type')=='#Shot': #whenever a shot annotation is found, extract the timestamp from the xml
                    dominant_colors_list=[]
                    for child2 in child:
                        if child2.tag=='{http://experience.univ-lyon1.fr/advene/ns}millisecond-fragment':
                            end=round(int(child2.get('end'))/1000*25) #timestamps are rounded, because there are no half frames
                            begin=round(int(child2.get('begin'))/1000*25)
                            if args.what_to_process=='scene': #if 'scene' is selected append the frames of the segments to a list
                                segment_list.append(read_video_segments(args.video_path,begin,end,args.resolution_width,args.target_colorspace))
                            if args.what_to_process=='segment': #if 'segment' is selected run extract_dominant_colors on the segment
                                segment = read_video_segments(args.video_path,begin,end,args.resolution_width,args.target_colorspace)
                                colors_df = bins_to_df(extract_dominant_colors(segment),args.bin_threshold,args.colors_to_return)
                                colors_list=[]
                                for color,perc in zip(colors_df.index.values,colors_df.values.tolist()):
                                    app=str(color)+str(' ')+str(perc)
                                    colors_list.append(app)
                                print(begin,end,colors_list)
                                file.write(str((begin,end,colors_list))+'\n') #write the timestamp and the extracted colors to file
            if args.what_to_process=='scene': #if 'scene' is selected run extract_dominant_colors on the the list of segments
                colors_df = bins_to_df(extract_dominant_colors(segment_list),args.bin_threshold,args.colors_to_return)
                colors_list=[]
                for color,perc in zip(colors_df.index.values,colors_df.values.tolist()):
                    app=str(color)+str(' ')+str(perc)
                    colors_list.append(app)
                print(colors_list)
                file.write(str(colors_list)+'\n') #write the extracted colors to file
            file.close()

In [12]:
def azp_path(path,output_path):
    if path[-4:] == '.azp': #if the path is to a single file
        #print('exactly')
        read_azp(path,output_path)
#     elif path[0][-4:] == '.azp': #if the path is to several files
#         #print('as')
#         for index,azp_path in enumerate(path):
#             print(azp_path)
#             name=output_path+'_'+azp_path[-20:]
#             print(azp_path)
#             print(name)
# #             read_azp(azp_path,name)
    else: #else it is assumed the path points to a directory
        directory_content = os.listdir(path)
        azp_list=[]
        for elem in directory_content:
            if elem[-4:]=='.azp':
                if path[-1]=='/': #if the path ends with an '/', add the .azp-file
                    azp_list.append(path+elem)    
                else: #else, add a '/' and then the .azp-file
                    azp_list.append(path+'/'+elem)
        for azp_path in azp_list:
            name=azp_path[:29]+'__'+output_path
            print(name)
            print(azp_path)
#             read_azp(azp_path,output_path)
        print('planned')

In [13]:
def change_colorspace(frame_list,target_colorspace):
    changed_frame_list=[]
    if target_colorspace=='HSV':
        print('HSV')
        for frame in frame_list:
            changed_frame_list.append(cv2.cvtColor(frame, cv2.COLOR_BGR2HSV))
        return changed_frame_list
    if target_colorspace=='cie-lab':
        print('cie-lab')
        for frame in frame_list:
            changed_frame_list.append(cv2.cvtColor(frame, cv2.COLOR_BGR2LAB))
        return changed_frame_list
    else:
        print('rgb')
        for frame in frame_list:
            changed_frame_list.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        return changed_frame_list

In [14]:
def color_accuracy(colors_target,colors_predictions,*acc_single):
    tl=0
    pl=0
    if acc_single:
        acc=[]
        for target,pred in zip(colors_target,colors_predictions):
            acc.append(len(list(set(target) & set(pred)))/len(target))
        return acc
    else:
        for target,pred in zip(colors_target,colors_predictions):
            if len(pred)<len(target):
                target=target[:len(pred)]
                tl+=len(target)
                pl+=len(list(set(target) & set(pred)))
            else:
                pred=pred[:len(targets)]
                tl+=len(target)
                pl+=len(list(set(target) & set(pred)))
        return pl/tl

In [15]:
def read_target_colors_azp(azp_path):
    zip_ref = zipfile.ZipFile(azp_path)
    zip_ref.extractall('/tmp')
    tree = ET.parse('/tmp/content.xml')
    root = tree.getroot().findall('./{http://experience.univ-lyon1.fr/advene/ns}annotations')
    colors_target=[]
    for child in root[0].iter():
        if child.get('type')=='#ColourRange':
            for child2 in child:
                if child2.tag=='{http://experience.univ-lyon1.fr/advene/ns}content':
                    colors_target.append(child2.text.split(','))
    return colors_target

In [16]:
def read_prediction_txt_file(txt_file):
    colors_list=[]
    with open(txt_file) as file:
        for line in file:
            line=line.split()
            line_aux=[]
            for i,elem in enumerate(line[2:]):
                if i%2==0:
                    line_aux.append(elem.strip('[').strip(']').strip("'"))
            colors_list.append(line_aux)
    return colors_list

In [17]:
def create_nns_picture(frame_list,target_colorspace,path):
    rgb_to_color=fn_rgb_to_color(target_colorspace,path) #get the color dict 
    bins={} #a dict with an entry for each for histograms 
    for rgb in rgb_to_color: #init the dict with zeros for every key
        bins[rgb_to_color[rgb]]=0
        
    rgb_list=[] #create a list of the rgb_values
    for rgb in rgb_to_color: #map the values of the dict to a list
        rgb_list.append(rgb)
    kdt = KDTree(rgb_list, leaf_size=30, metric='euclidean')  
    #flatten the image to 1d 
    img = frame_list[0].reshape((frame_list[0].shape[0] * frame_list[0].shape[1], 3))
#     img = frame_list.reshape((frame_list.shape[0] * frame_list.shape[1], 3))     

    nns = kdt.query(img, k=1, return_distance=False)
    changed_frame_aux=[]
    for nn in tqdm(nns):
        changed_frame_aux.append(rgb_list[nn[0]])
    changed_frame_aux=np.asarray(changed_frame_aux,dtype='uint8')
    changed_frame=changed_frame_aux.reshape(frame_list[0].shape[0],frame_list[0].shape[1],3)
#     changed_frame=changed_frame_aux.reshape(frame_list.shape[0],frame_list.shape[1],3)
    return changed_frame

In [18]:
def create_nns_picture_old_method(frame_list,target_colorspace,path):
    #bins dict for histograms
    bins={}
    rgb_list=[]
    rgb_to_color=fn_rgb_to_color(target_colorspace,path)
    #init the dict with zeros for every key
    for rgb in rgb_to_color:
        bins[rgb_to_color[rgb]]=0
    #map the values of the dict to a list
    for rgb in rgb_to_color:
        rgb_list.append(rgb)
    #flatten the image to 1d 
    img = frame_list[0].reshape((frame_list[0].shape[0] * frame_list[0].shape[1], 3))
    changed_frame_aux=[]
    #nearest neighbour search
    for pixel in tqdm(img):
        bin_aux=[]
        #get the euclidean distance between the colors and the current pixel
        for rgb in rgb_list:
            bin_aux.append(euclidean(pixel,rgb))
        # get the index of the color,which has the smallest distance, in rgb_list
        min_pos = np.argmin(bin_aux)
        changed_frame_aux.append(rgb_list[min_pos])
    #return the count of all the colors
    changed_frame_aux=np.asarray(changed_frame_aux,dtype='uint8')
    changed_frame=changed_frame_aux.reshape(frame_list[0].shape[0],frame_list[0].shape[1],3)
    return changed_frame

In [19]:
# read the .xml-file
zip_ref = zipfile.ZipFile('CompanyMen_v1.0-split-012-Bobby_being_angry.azp')
zip_ref.extractall('/tmp')
tree = ET.parse('/tmp/content.xml')
root = tree.getroot().findall('./{http://experience.univ-lyon1.fr/advene/ns}annotations')
i=0
end=[]
begin=[]
for child in root[0].iter():
    if child.get('type')=='#Shot':
        i+=1
        for child2 in child:
            if child2.tag=='{http://experience.univ-lyon1.fr/advene/ns}millisecond-fragment':
                end.append(round(int(child2.get('end'))/1000*25))
                begin.append(round(int(child2.get('begin'))/1000*25))
        if i==4:
            break

In [33]:
def stuff(colors_used):
    colors_reference={'darkred':(139,0,0),'firebrick':(178,34,34),'crimson':(220,20,60),'red':(255,0,0),
                    'tomato':(255,99,71),'salmon':(250,128,114),'darkorange':(255,140,0),'gold':(255,215,0),
                    'darkkhaki':(189,183,107),'yellow':(255,255,0),'darkolivegreen':(85,107,47),'olivedrab':(107,142,35),
                    'greenyellow':(173,255,47),'darkgreen':(0,100,0),'aquamarine':(127,255,212),'steelblue':(70,130,180),
                    'skyblue':(135,206,235),'darkblue':(0,0,139),'blue':(0,0,255),'royalblue':(65,105,225),'purple':(128,0,128),
                    'violet':(238,130,238),'deeppink':(255,20,147),'pink':(255,192,203),'antiquewhite':(250,235,215),
                    'saddlebrown':(139,69,19),'sandybrown':(244,164,96),'ivory':(255,255,240),'dimgrey':(105,105,105),
                    'grey':(28,128,128),'silver':(192,192,192),'lightgrey':(211,211,211),'black':(0,0,0),'white':(255,255,255),
                    'darkcyan':(0,139,139),'cyan':(0,255,255),'green':(0,128,0),'khaki':(240,230,140),'goldenrod':(218,165,32),
                    'orange':(255,165,0),'coral':(255,127,80),'magenta':(255,0,255),'wheat':(245,222,179),'skin':(255,224,189),'purple4':(147,112,219)}
        
    if (colors_used!=''):
        colors={}
        print(colors_used)
        colors_used_aux1=colors_used.split(',')
#         print(colors_used_aux1)
#         print(len(colors_used_aux1))
        colors_used_aux2=colors_used.split(';')
#         print(colors_used_aux2)
#         print(len(colors_used_aux2))
        
        if len(colors_used_aux1)>len(colors_used_aux2) and len(colors_used_aux2)==1:
            colors_used=colors_used_aux1
        elif len(colors_used_aux1)<len(colors_used_aux2) and len(colors_used_aux1)==1:
            colors_used=colors_used_aux2
        elif len(colors_used_aux1)==1 and len(colors_used_aux2)==1:
            colors_used=[colors_used]
        wrong_colors=[]
        for color in colors_used:
            print('color: ',color)
            try:
                colors[color]=colors_reference[color]
            except:
                wrong_colors.append(color)
    print(wrong_colors)
    return colors

In [35]:
# colors_used='red,blue,green' #good
# colors_used='red;blue;green' #good
colors_used='red' #good
colors_used='red:blue' # {} [string]
# colors_used='red:green:blue' # {} [string]
# colors_used='red,blue,green,df,dfg' #good
# colors_used='red,blue;green' # {} ['r', 'e', 'd', ',', 'b', 'l', 'u', 'e', ';', 'g', 'r', 'e', 'e', 'n']
stuff(colors_used)

red:blue
color:  red:blue
['red:blue']


{}