# Preprocessing

In [1]:
# Importing Essential Libraries
import os
import cv2
import time
import glob
import random
from tqdm import tqdm

In [2]:
# Required Parameters
dataset = "UCF-101/"                # Dataset Path
dataset2 = "dataset/"               # Dataset2 Path
train_path = "training_set/"        # Training Path
test_path = "testing_set/"          # Testing Path
no_of_frames = 1650                 # Number of Frames
ch = 4                              # Model Selection Choice
epochs = 20                         # Number of epochs
batch_size = 32                     # Batch Size
n_classes = 101                     # Number of Classes
patience = 2                        # Patience for EarlyStopping
stime = int(time.time())            # Defining Starting Time
categories = os.listdir(dataset)    # Name of each Class/Category

In [3]:
# Creating training_set directory
try:
    os.mkdir(train_path)
    print("Folder {} created...".format(train_path))
except:
    print("A folder {} already exists...".format(train_path))

A folder training_set/ already exists...


In [4]:
# Creating training_set directory
try:
    os.mkdir(train_path)
    print("Folder {} created...".format(train_path))
except:
    print("A folder {} already exists...".format(train_path))

A folder training_set/ already exists...


In [5]:
# Creating testing_set directory
try:
    os.mkdir(test_path)
    print("Folder {} created...".format(test_path))
except:
    print("A folder {} already exists...".format(test_path))

A folder testing_set/ already exists...


In [6]:
# Creating same directories for training_set/ that are already present in the dataset directory
for category in categories:
    try:
        os.mkdir(train_path + category)
        print("Folder {} created...".format(category))
    except:
        print("A folder already exists, named {}...".format(category, train_path))

A folder already exists, named ApplyEyeMakeup...
A folder already exists, named ApplyLipstick...
A folder already exists, named Archery...
A folder already exists, named BabyCrawling...
A folder already exists, named BalanceBeam...
A folder already exists, named BandMarching...
A folder already exists, named BaseballPitch...
A folder already exists, named Basketball...
A folder already exists, named BasketballDunk...
A folder already exists, named BenchPress...
A folder already exists, named Biking...
A folder already exists, named Billiards...
A folder already exists, named BlowDryHair...
A folder already exists, named BlowingCandles...
A folder already exists, named BodyWeightSquats...
A folder already exists, named Bowling...
A folder already exists, named BoxingPunchingBag...
A folder already exists, named BoxingSpeedBag...
A folder already exists, named BreastStroke...
A folder already exists, named BrushingTeeth...
A folder already exists, named CleanAndJerk...
A folder already e

In [7]:
# Creating same directories for testing_set/ that are already present in the dataset directory
for category in categories:
    try:
        os.mkdir(test_path  + category)
        print("Folder {} created...".format(category))
    except:
        print("A folder already exists, named {}...".format(category, test_path))

A folder already exists, named ApplyEyeMakeup...
A folder already exists, named ApplyLipstick...
A folder already exists, named Archery...
A folder already exists, named BabyCrawling...
A folder already exists, named BalanceBeam...
A folder already exists, named BandMarching...
A folder already exists, named BaseballPitch...
A folder already exists, named Basketball...
A folder already exists, named BasketballDunk...
A folder already exists, named BenchPress...
A folder already exists, named Biking...
A folder already exists, named Billiards...
A folder already exists, named BlowDryHair...
A folder already exists, named BlowingCandles...
A folder already exists, named BodyWeightSquats...
A folder already exists, named Bowling...
A folder already exists, named BoxingPunchingBag...
A folder already exists, named BoxingSpeedBag...
A folder already exists, named BreastStroke...
A folder already exists, named BrushingTeeth...
A folder already exists, named CleanAndJerk...
A folder already e

In [None]:
# Combining multiple videos into single video file
for category in tqdm(categories):
    videofiles = [dataset + category + "/" + n for n in os.listdir(dataset + category) if n[-4:]==".avi"]
    video_index = 0
    cap = cv2.VideoCapture(videofiles[0])    
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    # fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter("{}/{}/{}.avi".format(dataset2, category, category), fourcc, 25, (320, 240))
    while(cap.isOpened()):
        ret, frame = cap.read()
        if frame is None:
            video_index += 1
            if video_index >= len(videofiles):
                break
            else:
                cap = cv2.VideoCapture(videofiles[ video_index ])
                ret, frame = cap.read()
                out.write(frame)
        else:
            out.write(frame)
    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [None]:
# # Extracting one frame per five frame from the Videos
# for category in tqdm(categories):
#     count = 0    
#     a = glob.glob(dataset + '/' + category + '/*.avi')
#     for i in range(len(a)):
#         cap = cv2.VideoCapture(a[i])
#         frameRate = cap.get(5)
#         while(cap.isOpened()):
#             frameId = cap.get(1)
#             ret, frame = cap.read()
#             if (ret != True):
#                 break
#             if (frameId % math.floor(frameRate) == 0):
#                 cv2.imwrite(train_path + '/' + category + '/{}_{}.jpg'.format(category, count), frame)
#                 count += 1
#         cap.release()

In [None]:
# # Extracting every frame from the Videos
# for category in tqdm(categories):
#     count = 0    
#     a = glob.glob(dataset + category + '/*.avi')
#     for i in range(len(a)):
#         cap = cv2.VideoCapture(a[i])
#         # frameRate = cap.get(5)
#         while(cap.isOpened()):
#             # frameId = cap.get(1)
#             ret, frame = cap.read()
#             if (ret != True):
#                 break
#             # if (frameId % math.floor(frameRate) == 0):
#             else:
#                 cv2.imwrite(train_path + category + '/{}_{}.jpg'.format(category, count), frame)
#                 count += 1
#         cap.release()

In [None]:
train_files = [file for file in glob.glob(dataset2 + "*")]

# Getting total no. of frames in each classes
total_frames = []
for category in tqdm(categories):
    cap = cv2.VideoCapture(dataset2 + category + "/" + category + ".avi")
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    total_frames.append(length)
    cap.release()
    cv2.destroyAllWindows()

In [None]:
# Extracting 1650 images from each category
for i in tqdm(range(len(train_files))):
    count = 0
    cap = cv2.VideoCapture(train_files[i] + "\\" + train_files[i].split("\\")[-1] + ".avi")
    while(cap.isOpened()):
        frame_id = cap.get(1)
        ret, frame = cap.read()
        if ret != True:
            print("Exiting...")
            break
        if frame_id % int(total_frames[i] / no_of_frames) == 0.0:
            if count >= 1650:
                break
            file_name = "training_set\\" + train_files[i].split("\\")[-1] + "\\frame_{}.jpg".format(count); count += 1
            cv2.imwrite(file_name, frame)
    cap.release()
    cv2.destroyAllWindows()

In [None]:
# for category in tqdm(categories):
#     count = 0
#     cap = cv2.VideoCapture(train_path + category + "/" + category + ".avi")
#     while(cap.isOpened()):
#         frame_id = cap.get(1)
#         ret, frame = cap.read()
#         if ret != True:
#             print("Exiting...")
#             break
#         if frame_id % int(total_frames[i] / no_of_frames) == 0.0:
#             if count >= 1650:
#                 break
#             file_name = train_path + category + "/frame_{}.jpg".format(count); count += 1
#             cv2.imwrite(file_name, frame)
#     cap.release()
#     cv2.destroyAllWindows()s

In [None]:
train_files = [file for file in glob.glob(train_path + "*")]

# Moving random imgaes from training_set into testing_set
for train_file in tqdm(train_files):
    sub_file = [file for file in glob.glob(train_file + "\*")]
    test_files = random.sample(sub_file, 150)
    for test_file in test_files:
        img = cv2.imread(test_file)
        os.remove(test_file)
        cv2.imwrite(test_path + test_file.split("\\")[1] + "\\" + test_file.split("\\")[-1] , img)

In [8]:
# Counting number of images in each folder of training set
for category in categories:
    print(len(os.listdir(train_path + category)), "in training &",
          len(os.listdir(test_path + category)), "in testing", ":", category)

1500 in training & 150 in testing : ApplyEyeMakeup
1500 in training & 150 in testing : ApplyLipstick
1500 in training & 150 in testing : Archery
1500 in training & 150 in testing : BabyCrawling
1500 in training & 150 in testing : BalanceBeam
1500 in training & 150 in testing : BandMarching
1500 in training & 150 in testing : BaseballPitch
1500 in training & 150 in testing : Basketball
1500 in training & 150 in testing : BasketballDunk
1500 in training & 150 in testing : BenchPress
1500 in training & 150 in testing : Biking
1500 in training & 150 in testing : Billiards
1500 in training & 150 in testing : BlowDryHair
1500 in training & 150 in testing : BlowingCandles
1500 in training & 150 in testing : BodyWeightSquats
1500 in training & 150 in testing : Bowling
1500 in training & 150 in testing : BoxingPunchingBag
1500 in training & 150 in testing : BoxingSpeedBag
1500 in training & 150 in testing : BreastStroke
1500 in training & 150 in testing : BrushingTeeth
1500 in training & 150 in