# Music Video Analysis

In this notebook, different video features will be extracted from the music videos.

In [None]:
# Imports
# General data science purposes
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

# For scenedetection
from scenedetect import VideoManager
from scenedetect import SceneManager
from scenedetect.detectors import ContentDetector

# For image processing
import cv2

In [None]:
df = pd.read_csv('music_videos_v3.csv')

Possible features:
- motion intensity (average and standard error of normalized difference between frames)
- shots/frames (the amount of shots normalized to the amount of frames in the video)
- lighting 
- saturation
- color energy
- colorfulness

In [None]:
# Scene detection function
def find_scenes(video_path, threshold=30.0):
    video_manager = VideoManager([video_path])
    scene_manager = SceneManager()
    scene_manager.add_detector(
        ContentDetector(threshold=threshold))
    base_timecode = video_manager.get_base_timecode()
    video_manager.set_downscale_factor()
    video_manager.start()
    scene_manager.detect_scenes(frame_source=video_manager, show_progress=False)
    scene_list = scene_manager.get_scene_list(base_timecode)
    return scene_list


In [None]:
# Amount of frames function
def amount_frames(video_path):
    cap = cv2.VideoCapture(video_path)  
    TOTAL_FRAMES = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    return TOTAL_FRAMES

In [None]:
# 1 frame per second function
def frame_per_sec(cap):
    TOTAL_FRAMES = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    FRAME_RATE = round(cap.get(5)) 
    selected_frames = np.cumsum([FRAME_RATE for i in range(int(TOTAL_FRAMES / FRAME_RATE)) ])
    frames = []
    for frame in tqdm(selected_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES,frame)
        ret, frame = cap.read()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    return frames

In [None]:
# motion intensity function
def motion_intensity(frames):
    """
    Adapted from https://www.codespeedy.com/motion-detection-using-opencv-in-python/
    """
    diff = []
    for i in range(len(frames)-1):
        gray1 = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
        gray1 = cv2.GaussianBlur(gray1, (25, 25), 0)
        gray2 = cv2.cvtColor(frames[i+1], cv2.COLOR_BGR2GRAY)
        gray2 = cv2.GaussianBlur(gray2, (21, 21), 0)
        deltaframe=cv2.norm(gray1,gray2,cv2.NORM_L1)/(255*len(gray1)*len(gray1[0])) #Sum of absolute difference normalized by
                                                                    # grayscale values and amount of pixels
        diff.append(deltaframe)
    ave_diff = np.mean(diff)
    std_diff = np.std(diff)
    return ave_diff, std_diff

In [None]:
# lighting metric function
def lighting_bin(hls):
    l = hls[:,:,1]
    bins = np.linspace(1, 255, 21)
    flat = [val for sublist in l for val in sublist]
    digitized = np.digitize(flat, bins)
    count = np.bincount(digitized)
    lighting = sum([count[i]*(i-6) for i in range(7, len(count))]) - sum([count[i]*(7-j) for j in range(7)])
    return lighting

In [None]:
# mean lighting function
def lighting_mean(hls):
    l = [int(x[1]) for sublist in hls for x in sublist]
    mean = np.mean(l)
    return mean

In [None]:
# saturation metric function
def saturation_bin(hls):
    s = hls[:,:,2]
    bins = np.linspace(1, 255, 21)
    flat = [val for sublist in s for val in sublist]
    digitized = np.digitize(flat, bins)
    count = np.bincount(digitized)
    saturation = sum([count[i]*(i-9) for i in range(10, len(count))]) - sum([count[j]*(10-i) for j in range(10)])
    return saturation

In [None]:
# mean lighting function
def saturation_mean(hls):
    s = [int(x[2]) for sublist in hls for x in sublist]
    mean = np.mean(s)
    return mean

In [None]:
# color energy function
def color_energy(hls):
    sl = [int(x[1])*int(x[2]) for sublist in hls for x in sublist]
    h_std = np.std([x[0] for sublist in hls for x in sublist])
    color_energy = sum(sl)/(h_std*len(sl))
    return color_energy

In [None]:
# colorfulness functions
def image_colorfulness(image):
    """"
    Taken from https://www.pyimagesearch.com/2017/06/05/computing-image-colorfulness-with-opencv-and-python/, 
    based on Hassler & Süsstrunk
    """
    # split the image into its respective RGB components
    (B, G, R) = cv2.split(image.astype("float"))
    # compute rg = R - G
    rg = np.absolute(R - G)
    # compute yb = 0.5 * (R + G) - B
    yb = np.absolute(0.5 * (R + G) - B)
    # compute the mean and standard deviation of both `rg` and `yb`
    (rbMean, rbStd) = (np.mean(rg), np.std(rg))
    (ybMean, ybStd) = (np.mean(yb), np.std(yb))
    # combine the mean and standard deviations
    stdRoot = np.sqrt((rbStd ** 2) + (ybStd ** 2))
    meanRoot = np.sqrt((rbMean ** 2) + (ybMean ** 2))
    # derive the "colorfulness" metric and return it
    return stdRoot + (0.3 * meanRoot)

In [None]:
video_length = []
scene_count = []
frame_count = []
mot_int_mean = []
mot_int_std = []
lighting_met = []
light_mean = []
sat_met = []
sat_mean = []
col_en = []
colness = []

for k, path in enumerate(tqdm(df.path.values)):
    cap = cv2.VideoCapture(path)
    TOTAL_FRAMES = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    scene_list = find_scenes(path)
    scene_count.append(len(scene_list))
    frame_count.append(amount_frames(path))
    shots = []
    shot_length = []
    for start_time, end_time in scene_list:
        duration = end_time - start_time
        frame = (start_time.get_frames() + int(duration.get_frames() / 2))
        cap.set(cv2.CAP_PROP_POS_FRAMES,frame)
        ret, frame = cap.read()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        shots.append(frame)
        shot_length.append(duration.get_seconds())
    
    frames = frame_per_sec(cap)                   
    video_length.append(len(frames))
    
    mot_int = motion_intensity(frames)
    mot_int_mean.append(mot_int[0])
    mot_int_std.append(mot_int[1])
    
    lighting_met_i = []
    light_mean_i = []
    sat_met_i = []
    sat_mean_i = []
    col_en_i = []
    colness_i = []
    
    for frame in tqdm(shots):
        hls = cv2.cvtColor(frame, cv2.COLOR_BGR2HLS)
        
        lighting_met_i.append(lighting_bin(hls))
        light_mean_i.append(lighting_mean(hls))
        sat_met_i.append(saturation_bin(hls))
        sat_mean_i.append(saturation_mean(hls))
        col_en_i.append(color_energy(hls))
        colness_i.append(image_colorfulness(frame))
    
    lighting_met.append(np.average(lighting_met_i, weights=shot_length))
    light_mean.append(np.average(light_mean_i, weights=shot_length))
    sat_met.append(np.average(sat_met_i, weights=shot_length))
    sat_mean .append(np.average(sat_mean_i, weights=shot_length))
    col_en.append(np.average(col_en_i, weights=shot_length))
    colness.append(np.average(colness_i, weights=shot_length))
        

  0%|          | 0/997 [00:00<?, ?it/s]

  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/98 [00:00<?, ?it/s]

  0%|          | 0/251 [00:00<?, ?it/s]

  0%|          | 0/93 [00:00<?, ?it/s]

  0%|          | 0/192 [00:00<?, ?it/s]

  0%|          | 0/115 [00:00<?, ?it/s]

  0%|          | 0/258 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/277 [00:00<?, ?it/s]

  0%|          | 0/120 [00:00<?, ?it/s]

  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/146 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/120 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
frames = frame_per_sec(df.path[2])

  0%|          | 0/251 [00:00<?, ?it/s]

In [None]:
hls = cv2.cvtColor(frames[5], cv2.COLOR_BGR2HLS)
lighting_bin(hls)

213808

In [None]:
scene_list

[(FrameTimecode(frame=0, fps=29.970030),
  FrameTimecode(frame=18, fps=29.970030)),
 (FrameTimecode(frame=18, fps=29.970030),
  FrameTimecode(frame=42, fps=29.970030)),
 (FrameTimecode(frame=42, fps=29.970030),
  FrameTimecode(frame=64, fps=29.970030)),
 (FrameTimecode(frame=64, fps=29.970030),
  FrameTimecode(frame=87, fps=29.970030)),
 (FrameTimecode(frame=87, fps=29.970030),
  FrameTimecode(frame=162, fps=29.970030)),
 (FrameTimecode(frame=162, fps=29.970030),
  FrameTimecode(frame=372, fps=29.970030)),
 (FrameTimecode(frame=372, fps=29.970030),
  FrameTimecode(frame=483, fps=29.970030)),
 (FrameTimecode(frame=483, fps=29.970030),
  FrameTimecode(frame=650, fps=29.970030)),
 (FrameTimecode(frame=650, fps=29.970030),
  FrameTimecode(frame=699, fps=29.970030)),
 (FrameTimecode(frame=699, fps=29.970030),
  FrameTimecode(frame=937, fps=29.970030)),
 (FrameTimecode(frame=937, fps=29.970030),
  FrameTimecode(frame=1008, fps=29.970030)),
 (FrameTimecode(frame=1008, fps=29.970030),
  FrameT