In [1]:
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import scipy.stats as st

## Exploratory Data Analysis

In [2]:
video = cv2.VideoCapture('./data/Symfuhny/Symfuhny-0.mp4')
fps = video.get(cv2.CAP_PROP_FPS)
print('frames per second =', fps)

frames per second = 60.0


In [3]:
# Takes in path to video file, outputs the movement score by frame
def calculate_movement(path):
    video = cv2.VideoCapture('./'+path)
    count = 0
    success = True
    previous_frame = None
    movement = []

    while success:
        success, image = video.read()
        #print("New Frame: ", success)
        count += 1

        if (success):
            img_brg = np.array(image)
            img_rgb = cv2.cvtColor(src=img_brg, code=cv2.COLOR_BGR2RGB)

            # Grayscale and blur the image
            #if ((count % 2) == 0):
            current_frame = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
            current_frame = cv2.GaussianBlur(src=current_frame, ksize=(5,5), sigmaX=0)

            if (previous_frame is None):
                # First frame; there is no previous one yet
                previous_frame = current_frame
                continue

            # calculate difference and update previous frame
            diff_frame = cv2.absdiff(src1=previous_frame, src2=current_frame)
            diff = cv2.norm(diff_frame, normType=cv2.NORM_L2)
            movement.append(diff)

            # Move onto next frame
            previous_frame = current_frame
            
    return movement

In [None]:
streamer_videos = video_paths[0]
movement = calculate_movement('./data/Symfuhny/Symfuhny-0.mp4')
x = list(range(0, len(movement)))
plt.plot(x, movement)
plt.title('Movement Score')
plt.xlabel('Frame')
plt.ylabel('Distance')
plt.show()
len(movement)

In [None]:
movement = calculate_movement('./data/Symfuhny/Symfuhny-1.mp4')
x = list(range(0, len(movement)))
plt.plot(x, movement)
plt.title('Movement Score')
plt.xlabel('Frame')
plt.ylabel('Distance')
plt.show()
len(movement)

In [None]:
movement = calculate_movement('./data/Symfuhny/Symfuhny-2.mp4')
x = list(range(0, len(movement)))
plt.plot(x, movement)
plt.title('Movement Score')
plt.xlabel('Frame')
plt.ylabel('Distance')
plt.show()
len(movement)

## Feature Extraction

In [4]:
# Get relative paths to video files
def get_streamer_paths(folder_path):
    # Get file names
    file_names = os.listdir(path=folder_path)
    streamers_video_paths = []
    
    for name in file_names:
        if (name == 'data.csv'):
            continue
        # Read csv file of each streamers
        csv_path = folder_path+'/'+name+'/'+name+'.csv'
        df = pd.read_csv(csv_path)
        video_paths = df['video_path']
        
        # Append list of video paths by streamers
        streamers_video_paths.append(folder_path+'/'+name+'/'+video_paths)
    return streamers_video_paths

video_paths = get_streamer_paths('./data')

In [5]:
# Returns list of movements for a given streamer
def get_movement_list(streamer_videos):
    movement_list = []
    for i in range(0, len(streamer_videos)):
        movement_list.append(calculate_movement(streamer_videos[i]))
        
    return movement_list

# Flattens a list
def flatten_list(list):
    return [item for sublist in list for item in sublist]

In [6]:
# Calculates number of spikes in a given streamer videos
# Input movement list of streamer videos and z score
# Outputs number of spikes of streamer videos
def calculate_spike_num(movement_list, z_score):
    # Flatten the movement score list
    flat_list = flatten_list(movement_list)
    
    # Calculate the z score based on how many spikes we want
    #p_value = abs((total_spike_num/len(flat_list))-1)
    #z_score = st.norm.ppf(p_value)
    
    # Calculate threshold for the z score (spikes)
    std = np.std(flat_list)
    mean = np.mean(flat_list)
    thres = z_score * std + mean
    #print("Threshold is: ", thres)
    
    # Calculate number of spikes from each video
    spike_num_list = []
    for i in range(0, len(movement_list)):
        total = 0
        for movement_score in movement_list[i]:
            if (movement_score >= thres):
                total += 1
        spike_num_list.append(total)
    return spike_num_list

# Calculates maximum movement of a video clip
# Returns list of maximum movement scores
def calculate_maximum_movement(movement_list):
    max_movement = []
    for video_movement in movement_list:
        max_movement.append(max(video_movement))
    return max_movement

In [7]:
# Takes in paths to a streamer's videos
# Outputs a dataframe containing features for each videos
def extract_features_streamer(streamer_videos):
    # Get movement score list of videos
    movement_list = get_movement_list(streamer_videos)
    
    # Get number of spikes from each videos
    num_spike = calculate_spike_num(movement_list, 3)
    
    # Get maximum movement score from each videos
    max_movement = calculate_maximum_movement(movement_list)

    # Get file names
    file_names = []
    for path in streamer_videos:
        file_names.append(path[path.rfind('/')+1:len(path)-4])
    
    df = pd.DataFrame()
    df['File'] = file_names
    df['Number of Movement Spikes'] = num_spike
    df['Maximum Movement Score'] = max_movement
    return df

In [8]:
# Takes in path to the folder that contains the data
# Outputs a dataframe with features from all video
def feature_extraction(data_path, debug):
    df = pd.DataFrame()
    video_paths = get_streamer_paths(data_path)
    for streamer_videos in video_paths:
        if (df.empty):
            if (debug):
                print("Starting on Streamer ", streamer_videos[0])
            df = extract_features_streamer(streamer_videos)
            if (debug):
                print("Streamer ", streamer_videos[0], ' done!')
        else:
            if (debug):
                print("Starting on Streamer ", streamer_videos[0])
            df_new = extract_features_streamer(streamer_videos)
            df = pd.concat([df, df_new]).reset_index(drop=True)
            if (debug):
                print("Streamer ", streamer_videos[0], ' done!')
    return df

In [None]:
streamer_videos = video_paths[0]
df = extract_features_streamer(streamer_videos)
df

In [None]:
streamer_videos = video_paths[1]
df_1 = extract_features_streamer(streamer_videos)
df_1

In [None]:
df_new = pd.concat([df, df_1]).reset_index(drop=True)
df_new

In [9]:
df = feature_extraction('./data', True)
df

Starting on Streamer  ./data/buckefps/buckefps-0.mp4
Streamer  ./data/buckefps/buckefps-0.mp4  done!
Starting on Streamer  ./data/cyr/cyr-0.mp4
Streamer  ./data/cyr/cyr-0.mp4  done!
Starting on Streamer  ./data/Foolish_Gamers/Foolish_Gamers-0.mp4
Streamer  ./data/Foolish_Gamers/Foolish_Gamers-0.mp4  done!
Starting on Streamer  ./data/itsRyanHiga/itsRyanHiga-0.mp4
Streamer  ./data/itsRyanHiga/itsRyanHiga-0.mp4  done!
Starting on Streamer  ./data/Kastaclysm/Kastaclysm-0.mp4
Streamer  ./data/Kastaclysm/Kastaclysm-0.mp4  done!
Starting on Streamer  ./data/NICKMERCS/NICKMERCS-0.mp4
Streamer  ./data/NICKMERCS/NICKMERCS-0.mp4  done!
Starting on Streamer  ./data/Philza/Philza-0.mp4
Streamer  ./data/Philza/Philza-0.mp4  done!
Starting on Streamer  ./data/Ray/Ray-1.mp4
Streamer  ./data/Ray/Ray-1.mp4  done!
Starting on Streamer  ./data/shroud/shroud-0.mp4
Streamer  ./data/shroud/shroud-0.mp4  done!
Starting on Streamer  ./data/Symfuhny/Symfuhny-0.mp4
Streamer  ./data/Symfuhny/Symfuhny-0.mp4  done

Unnamed: 0,File,Number of Movement Spikes,Maximum Movement Score
0,buckefps-0,6,11052.390511
1,buckefps-1,2,6226.356880
2,buckefps-2,1,5706.733479
3,buckefps-3,14,7739.309853
4,buckefps-4,37,12527.618409
...,...,...,...
651,Xaryu-55,0,1411.815498
652,Xaryu-56,143,6323.025383
653,Xaryu-57,1,4228.973753
654,Xaryu-58,5,3574.774398


In [10]:
df.to_pickle('./video_feature.pkl')