In [1]:
#libraries
import cv2
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean
from sklearn.neighbors import KDTree
import time
import argparse
import json
import xml.etree.ElementTree as ET
from tqdm import tqdm

  return f(*args, **kwds)


In [2]:
#read video file frame by frame, beginning and ending with a timestamp
def read_video_segments(video,start_frame,end_frame,resolution_width=200):
    resolution_height=int(round(resolution_width * 9/16))
    resolution=(resolution_width,resolution_height)
    vid = cv2.VideoCapture(video)
    frames=[]
    vid_length=0
    with tqdm(total=end_frame-start_frame) as pbar: #init the progressbar,with max lenght of the given segment
        while(vid.isOpened()):
            # Capture frame-by-frame
            ret, frame = vid.read() # if ret is false, frame has no content
            if not ret:
                break
            # skip every "skip_frame"
            if vid_length>=start_frame:
                # resize the video to a different resolution
                frame=cv2.resize(frame,resolution)
                frames.append(frame) #add the individual frames to a list
                pbar.update() #update the progressbar
            if vid_length==end_frame:
                pbar.update()
                break
            vid_length+=1 #increase the vid_length counter
    vid.release()
    cv2.destroyAllWindows()
    return frames

In [13]:
#read video file frame by frame, beginning and ending with a timestamp
def read_video_segments2(video,start_frame,end_frame,resolution_width=200):
    resolution_height=int(round(resolution_width * 9/16))
    resolution=(resolution_width,resolution_height)
    vid = cv2.VideoCapture(video)
    frames=[]
    vid_length=0
    while(vid.isOpened()):
        # Capture frame-by-frame
        ret, frame = vid.read() # if ret is false, frame has no content
        if not ret:
            break
        # skip every "skip_frame"
        if vid_length>=start_frame:
            # resize the video to a different resolution
            frame=cv2.resize(frame,resolution)
            frames.append(frame) #add the individual frames to a list
        if vid_length==end_frame:
            break
        vid_length+=1 #increase the vid_length counter
    vid.release()
    cv2.destroyAllWindows()
    return frames

In [3]:
def extract_dominant_color(frame_list):
    print(str(len(frame_list))+' frames to process.')
    start=time.time()
    rgb_to_color=fn_rgb_to_color() #get the color dict 
    bins={} #a dict with an entry for each for histograms 
    for rgb in rgb_to_color: #init the dict with zeros for every key
        bins[rgb_to_color[rgb]]=0
        
    rgb_list=[] #create a list of the rgb_values
    for rgb in rgb_to_color: #map the values of the dict to a list
        rgb_list.append(rgb)
    i = 0

    kdt = KDTree(rgb_list, leaf_size=30, metric='euclidean')  
    for image in frame_list: #traverse the video
        #flatten the image to 1d 
        img = image.reshape((image.shape[0] * image.shape[1], 3))     
        nns = kdt.query(img, k=1, return_distance=False)
        for nn in nns:
            bins[rgb_to_color[rgb_list[nn[0]]]]+=1
        i+=1
        end=time.time()
        print('Finished '+str(i)+',time: '+str(end-start))
        norm_factor = len(frame_list)* np.shape(frame_list[0])[0] * np.shape(frame_list[0])[1]#normalize the bins
        bins_norm={k:v/norm_factor for k,v in bins.items()}
    return bins_norm

In [4]:
def bins_to_df(bins,bin_threshold=5,colors_to_return=5):
    #create a dataframe, sorted descending by count
    bins_sorted=sorted(zip(list(bins.values()),list(bins.keys())),reverse=True)
    df=pd.DataFrame(bins_sorted,columns=['count','color'])
    df.set_index('color',inplace=True) #set the colors as the index of the dataframe
    bin_threshold=bin_threshold/100 #scale the percentage to 0-1
    df = df[df>bin_threshold].dropna() #kick bins from the dataframe with precentage lower than bin_threshold 
    return df.head(colors_to_return)#return the color_return highest bins, default 5, if less bins then
                                    #color_return are there return all

In [5]:
def fn_rgb_to_color(*path):
    if not ('no'):
        path=str(path)[2:-3] #to get rid of the of the *args things
        rgb_to_color = {}
        with open(path) as f:
            for line in f:
                #split lines at "::
                color, rgb = line.strip().split(':')
                #strip the rgb-string of the parenthesis, split it up a the commas,
                #cast them to int and put them into a tuples
                rgb_value=tuple(map(int,(rgb.strip('(').strip(')').split(','))))
                rgb_to_color[rgb_value] = color
    else:
        colors={'darkred':(139,0,0),
        'firebrick':(178,34,34),
        'crimson':(220,20,60),
        'red':(255,0,0),
        'tomato':(255,99,71),
        'salmon':(250,128,114),
        'darkorange':(255,140,0),
        'gold':(255,215,0),
        'darkkhaki':(189,183,107),
        'yellow':(255,255,0),
        'darkolivegreen':(85,107,47),
        'olivedrab':(107,142,35),
        'greenyellow':(173,255,47),
        'darkgreen':(0,100,0),
        'aquamarine':(127,255,212),
        'steelblue':(70,130,180),
        'skyblue':(135,206,235),
        'darkblue':(0,0,139),
        'blue':(0,0,255),
        'royalblue':(65,105,225),
        'purple':(128,0,128),
        'violet':(238,130,238),
        'deeppink':(255,20,147),
        'pink':(255,192,203),
        'antiquewhite':(250,235,215),
        'saddlebrown':(139,69,19),
        'sandybrown':(244,164,96),
        'ivory':(255,255,240),
        'dimgrey':(105,105,105),
        'grey':(28,128,128),
        'silver':(192,192,192),
        'lightgrey':(211,211,211),
        'black':(0,0,0),
        'white':(255,255,255),
        'darkcyan':(0,139,139),
        'cyan':(0,255,255),
        'green':(0,128,0),
        'khaki':(240,230,140),
        'goldenrod':(218,165,32),
        'orange':(255,165,0),
        'coral':(255,127,80),
        'magenta':(255,0,255),
        'wheat':(245,222,179),
        'skin':(255,224,189),
        'purple4':(147,112,219)}
        rgb_to_color={}
        for color in colors:
            rgb_to_color[colors[color]]=color
        #purple4 is median purple
        #skin is caucasian
    return rgb_to_color

In [6]:
# read the .xml-file
tree = ET.parse('zip/content.xml')
root = tree.getroot().findall('./{http://experience.univ-lyon1.fr/advene/ns}annotations')

In [7]:
i=0
end=[]
begin=[]
for child in root[0].iter():
    if child.get('type')=='#Shot':
        i+=1
        for child2 in child:
            if child2.tag=='{http://experience.univ-lyon1.fr/advene/ns}millisecond-fragment':
                           end.append(int(child2.get('end'))/1000*25)
                           begin.append(int(child2.get('begin'))/1000*25)
        if i==2:
            break
print(begin)
print(end)

[33020.5, 33112.5]
[33112.5, 33242.5]


In [None]:
vid1=read_video_segments2('/home/jacob/Downloads/Wells_John_CompanyMen_full.mp4',begin[0],begin[0]+10)

In [None]:
i=0
with tqdm(total=75) as pbar:
    while i!=199:
        if(i%2==0):
            pbar.update()
            time.sleep(0.1)
            print(i)
        if(i==150):
            pbar.update()
            break
        i+=1

In [12]:
x = read_video_segments('videos/red.mp4',10,300)

292it [00:01, 173.05it/s]                         


In [None]:
# black=np.array([[[0,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]],
#                 [[0,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]],
#                 [[0,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]]])
# red=np.array([[[255,0,0],[255,0,0],[255,0,0],[255,0,0],[255,0,0]],
#              [[255,0,0],[255,0,0],[255,0,0],[255,0,0],[255,0,0]],
#               [[255,0,0],[255,0,0],[255,0,0],[255,0,0],[255,0,0]]])
# green=np.array([[[0,255,0],[0,255,0],[0,255,0],[0,255,0],[0,255,0]],
#                [[0,255,0],[0,255,0],[0,255,0],[0,255,0],[0,255,0]],
#                [[0,255,0],[0,255,0],[0,255,0],[0,255,0],[0,255,0]]])
# blue=np.array([[[0,0,255],[0,0,255],[0,0,255],[0,0,255],[0,0,255]],
#               [[0,0,255],[0,0,255],[0,0,255],[0,0,255],[0,0,255]],
#               [[0,0,255],[0,0,255],[0,0,255],[0,0,255],[0,0,255]]])
# white=np.array([[[255,255,255],[255,255,255],[255,255,255],[255,255,255],[255,255,255]],
#                [[255,255,255],[255,255,255],[255,255,255],[255,255,255],[255,255,255]],
#                [[255,255,255],[255,255,255],[255,255,255],[255,255,255],[255,255,255]]])

In [None]:
# a=[red,blue,red,blue]
# print(np.shape(a))
# assert x == extract_dominant_color(a)

In [None]:
# b = read_video_segments('videos/red.mp4',0,9,5)
# print(np.shape(b))
# assert np.shape(b)==(2,3,5,3)

In [None]:
# x ={'antiquewhite': 0.0,'aquamarine': 0.0,'black': 0.0,'blue': 0.5,'coral': 0.0, 'crimson': 0.0, 'cyan': 0.0, 'darkblue': 0.0,
#  'darkcyan': 0.0, 'darkgreen': 0.0, 'darkkhaki': 0.0, 'darkolivegreen': 0.0,'darkorange': 0.0, 'darkred': 0.0, 'deeppink': 0.0, 'dimgrey': 0.0,
#  'firebrick': 0.0, 'gold': 0.0, 'goldenrod': 0.0, 'green': 0.0,'greenyellow': 0.0, 'grey': 0.0, 'ivory': 0.0, 'khaki': 0.0,
#  'lightgrey': 0.0, 'magenta': 0.0, 'olivedrab': 0.0, 'orange': 0.0,'pink': 0.0, 'purple': 0.0, 'purple4': 0.0, 'red': 0.5,
#  'royalblue': 0.0, 'saddlebrown': 0.0, 'salmon': 0.0, 'sandybrown': 0.0,'silver': 0.0, 'skin': 0.0, 'skyblue': 0.0, 'steelblue': 0.0,
#  'tomato': 0.0, 'violet': 0.0, 'wheat': 0.0, 'white': 0.0, 'yellow': 0.0}

In [None]:
# x