In [2]:
!pip install -U scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (23.1 MB)
[K     |████████████████████████████████| 23.1 MB 139.5 MB/s 
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.0.0-py3-none-any.whl (14 kB)
Installing collected packages: threadpoolctl, scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 0.22.2.post1
    Uninstalling scikit-learn-0.22.2.post1:
      Successfully uninstalled scikit-learn-0.22.2.post1
Successfully installed scikit-learn-1.0 threadpoolctl-3.0.0


In [3]:
import sklearn
sklearn.__version__

'1.0'

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
!cp /content/drive/MyDrive/videos.zip /content/

In [None]:
!rm test.csv

In [None]:
!rm -rf videos/
!rm -rf frames/
!rm -rf weight.hdf55

In [6]:
!unzip videos.zip -x *.txt

Archive:  videos.zip
   creating: videos/
   creating: videos/double_leg/
  inflating: videos/double_leg/9q0FDZiwkCek.mkv  
   creating: __MACOSX/
   creating: __MACOSX/videos/
   creating: __MACOSX/videos/double_leg/
  inflating: __MACOSX/videos/double_leg/._9q0FDZiwkCek.mkv  
  inflating: videos/double_leg/8fuwzt4d7FuY.mkv  
  inflating: __MACOSX/videos/double_leg/._8fuwzt4d7FuY.mkv  
  inflating: videos/double_leg/21fKcAu0nrmgo.mkv  
  inflating: __MACOSX/videos/double_leg/._21fKcAu0nrmgo.mkv  
  inflating: videos/double_leg/24rgsLlb8SrMc.mkv  
  inflating: __MACOSX/videos/double_leg/._24rgsLlb8SrMc.mkv  
  inflating: videos/double_leg/48uKop_df4gA0.mkv  
  inflating: __MACOSX/videos/double_leg/._48uKop_df4gA0.mkv  
  inflating: videos/double_leg/47uKop_df4gA0.mkv  
  inflating: __MACOSX/videos/double_leg/._47uKop_df4gA0.mkv  
  inflating: videos/double_leg/23rgsLlb8SrMc.mkv  
  inflating: __MACOSX/videos/double_leg/._23rgsLlb8SrMc.mkv  
  inflating: videos/double_leg/33SbAzXngP480.

In [7]:
!rm -rf __MACOSX/

In [8]:
# FRAME EXTRACTION

import os
from tqdm import tqdm
import cv2
import pandas as pd
import sys

def create_folder(folder_name):
    """
    :param folder_name: the folder name
    Creates that folder if not exist
    """
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)  # make sure the directory exists


def get_video_data(videos_path):
    """
    :param videos_path: gets a path of videos, a folder that contains all the videos
    :return: A dataframe that contains all the videos' names and their class (class = in what folder they exists)

    For example: name: video_1.mkv tag: criss_cross video_1.mkv (name of the video) is under the sub-folder named
    criss_cross, this folder is under the folder stated in videos_path.
    """

    videos_tags_list = []
    for exercise_folder in os.listdir(path=videos_path):
        if not exercise_folder.startswith('.'):  # skip hidden files
            for video in os.listdir(path=f'{videos_path}/{exercise_folder}'):
                videos_tags_list.append({'name': video, 'tag': exercise_folder})

    video_df = pd.DataFrame(data=videos_tags_list)

    return video_df


def create_folders_and_sub_folders(folder_name, sub_folder_list):
    """
    :param sub_folder_list: sub folders
    :param folder_name: folder name
    :param subfolder_list: list of sub folders to be created
    :return: creates a folder, and after that sub-folders in it
    """

    create_folder(f'{folder_name}')

    for sub_folder in sub_folder_list:
        create_folder(f'{folder_name}/{sub_folder}')


def extract_frames(videos_path, frames_path, df):
    """
    :param videos_path: the video path
    :param frames_path: the frames path
    :param df: dataframe that contains the video data
    :return:
    """

    # indexes 0 and 1, for the list
    NAME = 0
    TAG = 1

    # Frame rate
    FRAMES = 20
    # Max attempts before it will give up?
    MAX_ATTEMPTS = 10

    # storing the frames from training videos
    counter_videos = 0
    frame_videos_list = []
    for video in tqdm(df.itertuples(index=False)):
        counter_frames = 0
        counter_attempts = 0
        video_path = f'{videos_path}/{video[TAG]}/{video[NAME]}'
        cap = cv2.VideoCapture(video_path)  # capturing the video from the given path
        while cap.isOpened():
            ret, frame = cap.read()
            if ret is False:
                if counter_attempts < MAX_ATTEMPTS:
                    counter_attempts += 1
                    continue
                else:
                    break
            if counter_frames < FRAMES:
                # storing the frames in a new folder named train
                filename = f'{frames_path}/{video[TAG]}/video{counter_videos}_frame{counter_frames}.jpg'
                counter_frames += 1
                cv2.imwrite(filename, frame)
                frame_videos_list.append({'video': video[NAME], 'image': filename, 'class': video[TAG]})
        counter_videos += 1
    cap.release()
    print('Done reading the videos and writing the frames')
    return frame_videos_list

def get_frames_from_folders():
    """
    Run this if you are already finished the extraction and want to load exsisting frames
    :return: dataframe with data on the frames
    """

    videos_path = 'videos'
    frame_path = 'frames'
    video_df = get_video_data(videos_path=videos_path)

    videos_list = extract_frames(videos_path=videos_path, frames_path=frame_path, df=video_df)
    frames_data = pd.DataFrame(videos_list)
    return frames_data

def get_frames_from_videos():
    """
    Creates the frames from videos
    :return: a frame dataframe contains data on the extracted frames: video name, frame name, class.
    """
    # Get the videos' names and their classes
    videos_path = './videos'
    frame_path = 'frames'

    video_df = get_video_data(videos_path=videos_path)

    # Set tags as the list of unique classes
    tags = list(video_df['tag'].unique())
    sub_folder_list = ['criss_cross', 'double_leg', 'roll_up']

    create_folders_and_sub_folders(folder_name=frame_path, sub_folder_list=sub_folder_list)

    videos_list = extract_frames(videos_path=videos_path, frames_path=frame_path, df=video_df)
    frames_data = pd.DataFrame(videos_list)
    print('Finished frame extraction')
    return frames_data


In [9]:
# SPLIT PROCESS

from sklearn.model_selection import StratifiedGroupKFold
import numpy as np
from tqdm import tqdm
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import pandas as pd


def split_data(data):
    train_inds, test_inds = next(
        StratifiedGroupKFold(shuffle=True, random_state=28)\
        .split(data, y=data['class'], groups=data['video']))

    train_data = data.iloc[train_inds].reset_index()
    test_data = data.iloc[test_inds].reset_index()
    return train_data, test_data


def images_to_array_with_classes(data):
    """
    :param data: a data set of images
    :return:
    """

    SAMPLES = 0
    X = np.empty((data.shape[SAMPLES], 224, 224, 3))
    # for loop to read and store frames
    for i in tqdm(range(data.shape[SAMPLES])):
        # loading the image and keeping the target size as (224,224,3)
        img = image.load_img(data['image'][i], target_size=(224, 224, 3))
        # converting it to array
        img = image.img_to_array(img)
        # normalizing the pixel value
        # img = img/255
        # appending the image to the train_image list
        X[i] = img
    X = preprocess_input(np.array(X))
    # y = pd.get_dummies(data['class'])
    y = data['class']
    return X, y


In [10]:
# TRAINING

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import  ImageDataGenerator
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

def model_initiation():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  ## Not trainable weights
    model = Sequential([base_model])
    model.add(Flatten())
    model.add(Dense(1024, activation='relu', input_shape=(25088,)))
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
    return model


# def train(X_train, y_train, best_parameters):
def train(X_train, y_train, X_val, y_val):
    print('Start training')

    model = model_initiation()
    # defining a function to save the weights of best model
    mcp_save = ModelCheckpoint('weight.hdf55', save_best_only=True, monitor='val_loss', mode='min')
    # compiling the model
    model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)
    tb = TensorBoard(log_dir='logs')
    aug = ImageDataGenerator(
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        brightness_range=[0.2, 1.0],
        fill_mode="nearest")
    print('Finish setting up')
    print('Begin traning')
    # training the model
    model.fit(x=aug.flow(X_train, y_train, batch_size=16), epochs=10000,
              validation_data=(X_val, y_val),
              steps_per_epoch=len(X_train) // 16,
              callbacks=[mcp_save, es, tb])
    model.evaluate(X_val, y_val)

In [11]:
# TESTING

from tqdm import tqdm
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import load_model
import numpy as np
from sklearn.metrics import accuracy_score
from scipy import stats
# from frame_extraction import get_video_data as vd
import pandas as pd

ONLY = 0


def test(test_data, y_test):
    y_test = pd.get_dummies(y_test)
    model = load_model('weight.hdf55')
    videos_path = './videos'

    # video_data = vd(videos_path)
    video_data = get_video_data(videos_path)

    # creating two lists to store predicted and actual tags
    predict = []
    actual = []
    for video in tqdm(test_data['video'].unique()):

        prediction_images = []
        for frame in test_data.loc[test_data['video'] == video, 'image']:
            img = image.load_img(frame, target_size=(224, 224, 3))
            img = image.img_to_array(img)
            prediction_images.append(img)

        prediction_images = preprocess_input(np.array(prediction_images))
        prediction = np.argmax(model.predict(prediction_images), axis=-1)
        # appending the mode of predictions in predict list to assign the tag to the video
        predict.append(y_test.columns.values[stats.mode(prediction)[ONLY][ONLY]])
        actual.append(video_data.loc[video_data['name'] == video, 'tag'].iloc[ONLY])
    return predict, actual


In [1]:
import multiprocessing

cores = multiprocessing.cpu_count() # Count the number of cores in a computer
cores

4

In [12]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
# import frame_extraction
#import split_preprocess
import pandas as pd
# import training
# import testing
# import hypertuning
import numpy as np
import sys

def test_func():
    test_data = pd.read_csv('test.csv')
    y_test = test_data['class']

    # predict, actual = testing.test(test,y_test)
    predict, actual = test(test_data, y_test)

    print(accuracy_score(predict, actual) * 100)
    return

def train_func():
    # If this is your first time creating frames use this:

    #frame_data = frame_extraction.get_frames_from_videos()
    frame_data = get_frames_from_videos()

    # if you already have frames and want to load them
    # frame_data = frame_extraction.get_frames_from_folders()
    # frame_data = get_frames_from_folders()

    # train, test = split_preprocess.split_data(frame_data)
    # X_train, y_train = split_preprocess.images_to_array_with_classes(train)
    # X_test, y_test = split_preprocess.images_to_array_with_classes(test)
    train_data, test_data = split_data(frame_data)

    # best_parameters = hypertuning.tuning(X_train, y_train)
    train_data, val_data = split_data(train_data)

    X_train, y_train = images_to_array_with_classes(train_data)
    X_test, y_test = images_to_array_with_classes(test_data)
    X_val, y_val = images_to_array_with_classes(val_data)

    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)
    y_val = le.transform(y_val)
    # test.to_csv('test.csv')
    # training.train(X_train, y_train, best_parameters)

    test_data.to_csv('test.csv')
    train(X_train, y_train, X_val, y_val)
    return X_test, y_test


In [13]:

if __name__ == '__main__':

    X_test, y_test = train_func()
    test_func()


158it [01:22,  1.91it/s]


Done reading the videos and writing the frames
Finished frame extraction


100%|██████████| 1914/1914 [00:16<00:00, 112.97it/s]
100%|██████████| 605/605 [00:05<00:00, 113.28it/s]
100%|██████████| 517/517 [00:04<00:00, 106.05it/s]


Start training
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Finish setting up
Begin traning
Epoch 1/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 2/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 3/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 7/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 8/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 9/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 10/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 11/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 12/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 13/10000
INFO:tensorflow:Assets written to: weight.hdf55/assets
Epoch 14/10000
Epoch 15

100%|██████████| 32/32 [00:14<00:00,  2.20it/s]

78.125





In [None]:
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict( batch_size=batch_size, epochs=epochs)

In [None]:
param_grid

{'batch_size': [10, 20, 40, 60, 80, 100], 'epochs': [10, 50, 100]}