In [1]:
#libraries
import cv2
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean
import time

can I start from specific timestamps ?

what happens for skip_frames=0 ?

In [2]:
#read video file frame by frame
def read_video(video,skip_frames,resolution_width):
    resolution_height=int(resolution_width * 9/16)
    resolution=(resolution_width,resolution_height)
#     res_dict={'1':(120,90),'2':(240,135),'3':(480,270)}
    vid = cv2.VideoCapture(video)
    frames=[]
    vid_length=0
    while(vid.isOpened()):
        # Capture frame-by-frame
        ret, frame = vid.read() # if ret is false, frame has no content
        # resize the video to a different resolution
        if ret:
            frame=cv2.resize(frame,resolution)
        # skip every "skip_frame"
        if vid_length%skip_frames==0:
            frames.append(frame) #add the individual frames to a list
        vid_length+=1 #increase the vid_length counter
        if not ret:
            break
    vid.release()
    cv2.destroyAllWindows()
    return frames

Dominant Color Extractor

In [93]:
def extract_dominant_color(frame_list,bin_threshold=0.05,colors_to_return=5):
    print(str(len(frame_list))+' frames to process.')
    start=time.time()
    rgb_to_color=fn_rgb_to_color() #get the color dict 
    bins={} #bins dict for histograms 
    for rgb in rgb_to_color: #init the dict with zeros for every key
        bins[rgb_to_color[rgb]]=0
    rgb_list=[] #create a traverseable list of the rgb_values
    for rgb in rgb_to_color: #map the values of the dict to a list
        rgb_list.append(rgb)
    i = 0
    for image in frame_list: #traverse the video
        #flatten the image to 1d 
        img = image.reshape((image.shape[0] * image.shape[1], 3))     
        for pixel in img: # do nearest neighbour search on every pixel every color in the list
            bin_aux=[]
            #get the euclidean distance between the colors and the current pixel
            for rgb in rgb_list:
                bin_aux.append(euclidean(pixel,rgb))
            # get the index of the color,which has the smallest distance, in rgb_list
            min_pos = np.argmin(bin_aux)
            #increment the respective color 
            bins[rgb_to_color[rgb_list[min_pos]]]+=1
        i+=1
        end=time.time()
        print('Finished '+str(i)+',time: '+str(end-start))
    #create a dataframe, sorted descending by count
    bins_sorted=sorted(zip(list(bins.values()),list(bins.keys())),reverse=True)
    df=pd.DataFrame(bins_sorted,columns=['count','color'])
    df.set_index('color',inplace=True) #set the colors as the index of the dataframe
    norm_factor = len(frame_list)* np.shape(frame_list[0])[0] * np.shape(frame_list[0])[1]  #normalize the bins
    df=df/norm_factor 
    df = df[df>bin_threshold].dropna() #kick bins from the dataframe with precentage lower than bin_threshold 
    return df.head(colors_to_return)#return the color_return highest bins, default 5, if less bins then
                                #color_return are there return all

In [94]:
def fn_rgb_to_color():
    colors={'darkred':(139,0,0),
    'firebrick':(178,34,34),
    'crimson':(220,20,60),
    'red':(255,0,0),
    'tomato':(255,99,71),
    'salmon':(250,128,114),
    'dark_orange':(255,140,0),
    'gold':(255,215,0),
    'dark_khaki':(189,183,107),
    'yellow':(255,255,0),
    'dark_olive_green':(85,107,47),
    'olive_drab':(107,142,35),
    'green_yellow':(173,255,47),
    'dark_green':(0,100,0),
    'aqua_marine':(127,255,212),
    'steel_blue':(70,130,180),
    'sky_blue':(135,206,235),
    'dark_blue':(0,0,139),
    'blue':(0,0,255),
    'royal_blue':(65,105,225),
    'purple':(128,0,128),
    'violet':(238,130,238),
    'deep_pink':(255,20,147),
    'pink':(255,192,203),
    'antique_white':(250,235,215),
    'saddle_brown':(139,69,19),
    'sandy_brown':(244,164,96),
    'ivory':(255,255,240),
    'dim_grey':(105,105,105),
    'grey':(28,128,128),
    'silver':(192,192,192),
    'light_grey':(211,211,211),
    'black':(0,0,0),
    'white':(255,255,255),
    'dark_cyan':(0,139,139),
    'cyan':(0,255,255),
    'green':(0,128,0),
    'khaki':(240,230,140),
    'golden_rod':(218,165,32),
    'orange':(255,165,0),
    'coral':(255,127,80),
    'magenta':(255,0,255),
    'wheat':(245,222,179),
    'skin':(255,224,189),
    'purple4':(147,112,219)}
    rgb_to_color={}
    for color in colors:
        rgb_to_color[colors[color]]=color
    #purple4 is median purple
    #skin is caucasian
    return rgb_to_color

In [95]:
# # 240,135
# frames_2 = read_video('/home/jacob/Downloads/IMG_2525.MOV',2,(240,135))
# frames_8 = read_video('/home/jacob/Downloads/IMG_2525.MOV',8,(240,135))
# frames_32 = read_video('/home/jacob/Downloads/IMG_2525.MOV',32,(240,135))
# print(len(frames_2),len(frames_8),len(frames_32))

In [96]:
# # 120,90
# frames_2_2 = read_video('/home/jacob/Downloads/IMG_2525.MOV',2,(120,90))
# frames_8_2 = read_video('/home/jacob/Downloads/IMG_2525.MOV',8,(120,90))
# frames_32_2 = read_video('/home/jacob/Downloads/IMG_2525.MOV',32,(120,90))
# print(len(frames_2_2),len(frames_8_2),len(frames_32_2))

- with (640,360) one frame takes 127 seconds
- with (480,270) ....
- with (240,135) one frame takes 17 seconds
- with (120,90) one frame takes 5 seconds
- with (60,45) one frame takes 1.5 seconds

In [91]:
frames_8_2_red = read_video('videos/red.mp4',200,120)
df_8_2_red = extract_dominant_color(frames_8_2_red,bin_threshold=0.02)

# frames_32_2_red = read_video('red.mp4',32,(120,90))

# frames_8_2_green = read_video('green.mp4',8,(120,90))
# frames_32_2_green = read_video('green.mp4',32,(120,90))

# frames_8_2_yellow = read_video('yellow.mp4',8,(120,90))
# frames_32_2_yellow = read_video('yellow.mp4',32,(120,90))

# frames_8_2_blue = read_video('blue.mp4',8,(120,90))
# frames_32_2_blue = read_video('blue.mp4',32,(120,90))

6 frames to process.
Finished 1,time: 3.5499818325042725
Finished 2,time: 7.090299844741821
Finished 3,time: 10.579890251159668
Finished 4,time: 14.051212549209595
Finished 5,time: 17.52706503868103
Finished 6,time: 20.972081661224365


In [92]:
df_8_2_red

Unnamed: 0_level_0,count
color,Unnamed: 1_level_1
black,0.297595
dark_olive_green,0.20483
dim_grey,0.169486
purple4,0.0676
purple,0.066812


In [40]:
# df_8_2_red = extract_dominant_color(frames_8_2_red,rgb_to_color)
# df_32_2_red = extract_dominant_color(frames_32_2_red,rgb_to_color)

In [10]:
# df_8_2_green = extract_dominant_color(frames_8_2_green,rgb_to_color)
# df_32_2_green = extract_dominant_color(frames_32_2_green,rgb_to_color)

In [11]:
# df_8_2_yellow = extract_dominant_color(frames_8_2_yellow,rgb_to_color)
# df_32_2_yellow = extract_dominant_color(frames_32_2_yellow,rgb_to_color)

In [12]:
# df_8_2_blue = extract_dominant_color(frames_8_2_blue,rgb_to_color)
# df_32_2_blue = extract_dominant_color(frames_32_2_blue,rgb_to_color)

In [13]:
# print('red','\n',df_8_2_red.tail(),'\n',df_32_2_red.tail(),'\n')
# print('green','\n',df_8_2_green.tail(),'\n',df_32_2_green.tail(),'\n')
# print('yellow','\n',df_8_2_yellow.tail(),'\n',df_32_2_yellow.tail(),'\n')
# print('blue','\n',df_8_2_blue.tail(),'\n',df_32_2_blue.tail(),'\n')

In [14]:
# frames_8_2_red = read_video('red.mp4',8,(60,45))

# frames_8_2_green = read_video('green.mp4',8,(120,90))

# frames_8_2_yellow = read_video('yellow.mp4',8,(120,90))

# frames_8_2_blue = read_video('blue.mp4',8,(120,90))

In [15]:
# df_8_2_red = extract_dominant_color(frames_8_2_red,rgb_to_color)

In [16]:
# df_8_2_red.tail()

In [17]:
# these will take 10 and 2.6 hours respectively, so I won't touch them
# bins_2 = extract_dominant_color(frames_2,rgb_to_color)
# bins_8 = extract_dominant_color(frames_8,rgb_to_color)

In [18]:
# bins_32 = extract_dominant_color(frames_32,rgb_to_color)

In [19]:
# bins_2_2 = extract_dominant_color(frames_2_2,rgb_to_color)
# bins_8_2 = extract_dominant_color(frames_8_2,rgb_to_color)

In [20]:
# bins_32_2 = extract_dominant_color(frames_32_2,rgb_to_color)

In [21]:
# #get bins into dataframes for better comparison
# bins_32_sorted=sorted(zip(list(bins_32.values()),list(bins_32.keys())))
# bins_8_2_sorted=sorted(zip(list(bins_8_2.values()),list(bins_8_2.keys())))
# bins_32_2_sorted=sorted(zip(list(bins_32_2.values()),list(bins_32_2.keys())))

# df_32=pd.DataFrame(bins_32_sorted,columns=['count','color'])
# df_8_2=pd.DataFrame(bins_8_2_sorted,columns=['count','color'])
# df_32_2=pd.DataFrame(bins_32_2_sorted,columns=['count','color'])

# df_32.set_index('color',inplace=True)
# df_8_2.set_index('color',inplace=True)
# df_32_2.set_index('color',inplace=True)
# df_32.tail()
# # df_8_2.tail()

df_8_2

steel_blue	259912

white	284117

dim_grey	1108648

black	1311002

dark_olive_green	1778223

--------------------------------------------

df_32

steel_blue	194958

white	211503

dim_grey	833335

black	983867

dark_olive_green	1336795

In [22]:
# df_32_2.tail()

df_32_2

steel_blue	64574

white	70615

dim_grey	278010

black	328651

dark_olive_green	445648