In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchsummary import summary
import cv2
import time
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
from sklearn.model_selection import train_test_split

In [3]:
cd_path = 'Jester_Dataset/Dataset'
Jester_data = pd.read_csv(os.path.join(cd_path,'Train.csv'))

In [4]:
Jester_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
0,1,Doing other things,37,0,"(100, 176)",JPEG
1,3,Pushing Two Fingers Away,37,6,"(100, 176)",JPEG
2,6,Drumming Fingers,37,1,"(100, 176)",JPEG
3,11,Sliding Two Fingers Down,37,10,"(100, 176)",JPEG
4,14,Pushing Hand Away,37,5,"(100, 176)",JPEG


In [5]:
unique_labels = Jester_data['label'].unique()
print(len(unique_labels))

27


In [6]:
for labels in unique_labels:
    print(labels)

Doing other things
Pushing Two Fingers Away
Drumming Fingers
Sliding Two Fingers Down
Pushing Hand Away
Shaking Hand
Pulling Two Fingers In
Stop Sign
Zooming In With Two Fingers
Sliding Two Fingers Up
Zooming Out With Two Fingers
Zooming In With Full Hand
No gesture
Swiping Right
Thumb Down
Rolling Hand Forward
Pulling Hand In
Zooming Out With Full Hand
Swiping Left
Rolling Hand Backward
Turning Hand Counterclockwise
Swiping Up
Turning Hand Clockwise
Sliding Two Fingers Left
Swiping Down
Thumb Up
Sliding Two Fingers Right


In [7]:
selected_gestures = ["Rolling Hand Backward", "Rolling Hand Forward","No gesture", "Swiping Left", "Swiping Right", "Stop Sign", "Thumb Up","Thumb Down", "Zooming Out With Full Hand","Zooming In With Full Hand","Shaking Hand","Drumming Fingers","Swiping Up","Swiping Down","Sliding Two Fingers Down"]
# selected_gestures = ["Rolling Hand Backward", "Rolling Hand Forward","No gesture", "Swiping Left", "Swiping Right", "Stop Sign", "Thumb Up","Thumb Down", "Shaking Hand","Drumming Fingers","Swiping Up","Swiping Down"]

In [8]:
data = Jester_data[Jester_data['label'].isin(selected_gestures)]

In [9]:
data.tail()

Unnamed: 0,video_id,label,frames,label_id,shape,format
50411,148075,No gesture,37,2,"(100, 176)",JPEG
50412,148077,Drumming Fingers,37,1,"(100, 176)",JPEG
50415,148084,No gesture,37,2,"(100, 176)",JPEG
50416,148085,Drumming Fingers,37,1,"(100, 176)",JPEG
50418,148090,Swiping Left,37,16,"(100, 176)",JPEG


In [10]:
unique_labels = data['label'].unique()
print(len(unique_labels))

15


In [11]:
data[['label','label_id']]

Unnamed: 0,label,label_id
2,Drumming Fingers,1
3,Sliding Two Fingers Down,10
5,Shaking Hand,9
8,Stop Sign,14
12,Shaking Hand,9
...,...,...
50411,No gesture,2
50412,Drumming Fingers,1
50415,No gesture,2
50416,Drumming Fingers,1


In [12]:
len(data[data['label']=='Rolling Hand Backward'])

1715

In [13]:
img_rows, img_cols = 128,128
X_tr = []

In [14]:
desired_label = 'Rolling Hand Backward'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
46,133,Rolling Hand Backward,37,7,"(100, 176)",JPEG
78,201,Rolling Hand Backward,37,7,"(100, 176)",JPEG
115,306,Rolling Hand Backward,37,7,"(100, 176)",JPEG
148,405,Rolling Hand Backward,37,7,"(100, 176)",JPEG
150,408,Rolling Hand Backward,37,7,"(100, 132)",JPEG


In [15]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

0it [00:00, ?it/s]

1715it [00:02, 794.60it/s]

(2, 128, 128, 3)
1715





In [None]:
desired_label = 'Rolling Hand Forward'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
18,56,Rolling Hand Forward,37,8,"(100, 176)",JPEG
56,155,Rolling Hand Forward,37,8,"(100, 176)",JPEG
81,212,Rolling Hand Forward,37,8,"(100, 176)",JPEG
111,301,Rolling Hand Forward,37,8,"(100, 176)",JPEG
119,316,Rolling Hand Forward,37,8,"(100, 176)",JPEG


In [16]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1788it [00:20, 86.98it/s] 

(30, 128, 128, 3)
3503





In [57]:
desired_label = 'No gesture'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
15,50,No gesture,37,2,"(100, 176)",JPEG
24,70,No gesture,37,2,"(100, 176)",JPEG
60,161,No gesture,37,2,"(100, 176)",JPEG
83,220,No gesture,37,2,"(100, 176)",JPEG
118,314,No gesture,37,2,"(100, 132)",JPEG


In [58]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1844it [00:19, 93.16it/s] 

(30, 128, 128, 3)
5347





In [59]:
desired_label = 'Swiping Left'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
39,107,Swiping Left,37,16,"(100, 100)",JPEG
63,169,Swiping Left,37,16,"(100, 176)",JPEG
122,327,Swiping Left,37,16,"(100, 176)",JPEG
157,429,Swiping Left,37,16,"(100, 176)",JPEG
174,477,Swiping Left,37,16,"(100, 176)",JPEG


In [60]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1762it [00:18, 93.64it/s] 

(30, 128, 128, 3)
7109





In [61]:
desired_label = 'Swiping Right'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
16,51,Swiping Right,37,17,"(100, 176)",JPEG
34,95,Swiping Right,37,17,"(100, 132)",JPEG
35,100,Swiping Right,37,17,"(100, 176)",JPEG
52,149,Swiping Right,37,17,"(100, 132)",JPEG
110,297,Swiping Right,37,17,"(100, 176)",JPEG


In [62]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1259it [00:14, 98.43it/s] 

: 

In [None]:
desired_label = 'Stop Sign'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
116,310,Thumb Up,37,20,"(100, 176)",JPEG
139,381,Thumb Up,37,20,"(100, 176)",JPEG
256,681,Thumb Up,37,20,"(100, 176)",JPEG
271,718,Thumb Up,37,20,"(100, 176)",JPEG
289,759,Thumb Up,37,20,"(100, 176)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1841it [00:14, 126.86it/s]

(30, 128, 128, 3)
10657





In [None]:
desired_label = 'Thumb Up'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
17,55,Thumb Down,37,19,"(100, 176)",JPEG
59,160,Thumb Down,37,19,"(100, 132)",JPEG
73,193,Thumb Down,37,19,"(100, 176)",JPEG
113,304,Thumb Down,37,19,"(100, 176)",JPEG
124,331,Thumb Down,37,19,"(100, 176)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1810it [00:14, 129.15it/s]

(30, 128, 128, 3)
12467





In [None]:
desired_label = 'Thumb Down'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
36,103,Zooming Out With Full Hand,37,25,"(100, 176)",JPEG
41,110,Zooming Out With Full Hand,37,25,"(100, 176)",JPEG
53,150,Zooming Out With Full Hand,37,25,"(100, 176)",JPEG
112,302,Zooming Out With Full Hand,37,25,"(100, 132)",JPEG
159,433,Zooming Out With Full Hand,37,25,"(100, 176)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1832it [00:20, 89.17it/s] 

(30, 128, 128, 3)
14299





In [None]:
desired_label = 'Zooming Out With Full Hand'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
14,46,Zooming In With Full Hand,37,23,"(100, 176)",JPEG
62,167,Zooming In With Full Hand,37,23,"(100, 132)",JPEG
64,171,Zooming In With Full Hand,37,23,"(100, 176)",JPEG
74,194,Zooming In With Full Hand,37,23,"(100, 176)",JPEG
76,197,Zooming In With Full Hand,37,23,"(100, 132)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1799it [00:19, 90.99it/s] 

(30, 128, 128, 3)
16098





In [None]:
desired_label = 'Zooming In With Full Hand'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
57,157,Swiping Up,37,18,"(100, 176)",JPEG
82,217,Swiping Up,37,18,"(100, 176)",JPEG
86,232,Swiping Up,37,18,"(100, 132)",JPEG
96,250,Swiping Up,37,18,"(100, 176)",JPEG
126,336,Swiping Up,37,18,"(100, 176)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1768it [00:14, 126.01it/s]

(30, 128, 128, 3)
14235





In [None]:
desired_label = 'Shaking Hand'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
75,196,Swiping Down,37,15,"(100, 176)",JPEG
77,198,Swiping Down,37,15,"(100, 176)",JPEG
95,248,Swiping Down,37,15,"(100, 176)",JPEG
152,413,Swiping Down,37,15,"(100, 176)",JPEG
183,497,Swiping Down,37,15,"(100, 176)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1824it [00:17, 106.20it/s]

(30, 128, 128, 3)
16059





In [None]:
desired_label = 'Drumming Fingers'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
15,50,No gesture,37,2,"(100, 176)",JPEG
24,70,No gesture,37,2,"(100, 176)",JPEG
60,161,No gesture,37,2,"(100, 176)",JPEG
83,220,No gesture,37,2,"(100, 176)",JPEG
118,314,No gesture,37,2,"(100, 132)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1844it [00:14, 131.30it/s]

(30, 128, 128, 3)
17903





In [None]:
desired_label = 'Swiping Up'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
5,17,Shaking Hand,37,9,"(100, 176)",JPEG
12,41,Shaking Hand,37,9,"(100, 176)",JPEG
47,137,Shaking Hand,37,9,"(100, 176)",JPEG
101,263,Shaking Hand,37,9,"(100, 176)",JPEG
224,580,Shaking Hand,37,9,"(100, 176)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the imges of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1789it [00:13, 131.85it/s]

(30, 128, 128, 3)
19692





In [None]:
desired_label = 'Swiping Down'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
2,6,Drumming Fingers,37,1,"(100, 176)",JPEG
87,233,Drumming Fingers,37,1,"(100, 176)",JPEG
109,294,Drumming Fingers,37,1,"(100, 176)",JPEG
165,452,Drumming Fingers,37,1,"(100, 176)",JPEG
184,498,Drumming Fingers,37,1,"(100, 176)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1818it [00:13, 134.81it/s]

(30, 128, 128, 3)
21510





In [None]:
desired_label = 'Sliding Two Fingers Down'
filtered_data = data[data['label']== desired_label]
filtered_data.head()

Unnamed: 0,video_id,label,frames,label_id,shape,format
3,11,Sliding Two Fingers Down,37,10,"(100, 176)",JPEG
30,85,Sliding Two Fingers Down,37,10,"(100, 100)",JPEG
44,121,Sliding Two Fingers Down,37,10,"(100, 132)",JPEG
69,184,Sliding Two Fingers Down,37,10,"(100, 132)",JPEG
123,330,Sliding Two Fingers Down,37,10,"(100, 176)",JPEG


In [None]:
for index, row in tqdm(filtered_data.iterrows()): # Looping throgh all the subfolders 
    video_id = row['video_id']  # Name of the Video folder
    video_path = os.path.join(cd_path,'Train',str(video_id)) # Path to the video folder

    listing_stop = sorted(os.listdir(video_path))
    frames = []
    img_depth = 0

    for imgs in listing_stop: # Looping through the images of the folder
        if img_depth < 30:
            img = os.path.join(video_path, imgs)
            frame = cv2.imread(img)
            # cv2.imshow("image",frame)
            frame = cv2.resize(frame, (img_rows, img_cols), interpolation=cv2.INTER_AREA)
            RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(RGB)
            img_depth += 1
        else:
            break
        # Visualizing the dataset
        # key = cv2.waitKey(1) & 0xFF
        # if key == ord("q"):
        #     break
    input_img = np.array(frames) # Converting to numpy arrays
    # Converting 
    ipt = np.rollaxis(np.rollaxis(input_img, 2, 0), 2, 0)
    ipt = np.rollaxis(ipt, 2, 0)
    X_tr.append(ipt)
# Shape and number of samples
print(ipt.shape)
num_samples = len(X_tr)
print(num_samples)

1832it [00:19, 93.99it/s] 

(30, 128, 128, 3)
26973





In [16]:
X_tr_array = np.array(X_tr)   # converting the frames read into array
num_samples = len(X_tr_array) 
print (num_samples)

1715


In [17]:
label=np.ones((num_samples,),dtype = int)
label[0:1715]= 0        # Rolling Hand Backward
label[1715:3503] = 1    # Rolling Hand Forward
label[3503:5347] = 2    # No gesture
label[5347:7109] = 3    # Swiping Left
label[7109:8839] = 4    # Swiping Right
label[8839:10660] = 5   # Stop Sign
label[10660:12501] = 6  # Thumb Up
label[12501:14311] = 7  # Thumb Down
label[14311:16143] = 8  # Zooming Out With Full Hand
label[16143:17942] = 9  # Zooming In With Full Hand
label[17942:19731] = 10 # Shaking Hand
label[19731:21549] = 11 # Drumming Fingers
label[21549:23317] = 12  # Swiping Up
label[23317:25141] = 13 # Swiping Down
label[25141:26973] = 14 # Sliding Two Fingers Down

In [18]:
img_depth = 30
train_data = [X_tr_array,label]
X_tr_tensor = torch.tensor(train_data[0])
label_tensor = torch.tensor(train_data[1])
(X_train, y_train) = (X_tr_tensor, label_tensor)
print(y_train.dtype)
print(X_train.dtype)
print('X_Train shape:', X_train.shape)
train_set = np.zeros((num_samples, img_depth, img_cols,img_rows,3))

for h in range(num_samples):
    train_set[h][:][:][:][:]=X_train[h,:,:,:]

patch_size = 30   # img_depth or number of frames used for each video
print(train_set.shape, 'train samples')

torch.int64
torch.uint8
X_Train shape: torch.Size([1715, 2, 128, 128, 3])
(1715, 2, 128, 128, 3) train samples


In [19]:
# nb_classes = 15
nb_classes = 2

In [20]:
Y_train = torch.nn.functional.one_hot(y_train, num_classes=nb_classes)
print(Y_train.shape)
print(Y_train.dtype)

torch.Size([1715, 2])
torch.int64


In [21]:
# Data Normalization(Pre-Processing)
train_set = train_set.astype('float16')
print(np.mean(train_set))
train_set -= np.mean(train_set)
print(np.max(train_set))
train_set /=np.max(train_set)

113.8
141.2


In [22]:
weight_decay = 0.00005
class Conv3DLSTMModel(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(Conv3DLSTMModel, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv3d(3, 16, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
            nn.ReLU(),
            nn.Conv3d(16, 16,stride=1, kernel_size=(3, 3, 3), padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(2, 2, 2))
        )
        self.conv2 = nn.Sequential(
            nn.Conv3d(16, 32,stride=1, kernel_size=(3, 3, 3), padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.Conv3d(32, 32,stride=1, kernel_size=(3, 3, 3), padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2))
        )
        self.conv3 = nn.Sequential(
            nn.Conv3d(32, 64,stride=1, kernel_size=(3, 3, 3), padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.Conv3d(64, 64,stride=1, kernel_size=(3, 3, 3), padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.Conv3d(64, 64,stride=1, kernel_size=(3, 3, 3), padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2))
        )
        self.conv4 = nn.Sequential(
            nn.Conv3d(64, 128,stride=1, kernel_size=(3, 3, 3),padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.Conv3d(128, 128,stride=1, kernel_size=(3, 3, 3), padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.Conv3d(128, 128,stride=1, kernel_size=(3, 3, 3),padding='same',dilation=1,bias=False),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2))
        )
        self.flattened_tensor = nn.Sequential(nn.Flatten())
        self.dense1 = nn.Linear(30 * 8192, 128 * 256)
        self.dense2 = nn.Linear(128 * 256, 128 * 256)
        self.convlstm1 = nn.ConvLSTM(
            input_size=128,
            hidden_size=64,
            kernel_size=(3, 3),
            num_layers=1,
            batch_first=True,
            bias=True,
            return_all_layers=True
        )
        self.convlstm2 = nn.ConvLSTM(
            input_size=64,
            hidden_size=64,
            kernel_size=(3, 3),
            num_layers=1,
            batch_first=True,
            bias=True,
            return_all_layers=True
        )
        self.convlstm3 = nn.ConvLSTM(
            input_size=64,
            hidden_size=64,
            kernel_size=(3, 3),
            num_layers=1,
            batch_first=True,
            bias=True,
            return_all_layers=True
        )
        self.global_avg_pooling = nn.AdaptiveAvgPool3d(1)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(64, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.flattened_tensor(x)
        print(x.shape)
        # x = x.view(x.size(0), -1)
        batch_size, num_channels, seq_len, height, width = x.size()
        x = x.view(batch_size, seq_len, num_channels * height * width)
        print(x.shape)
        x = self.dense1(x)
        print(x.shape)
        # x = self.dense2(x)
        x = self.convlstm1(x)
        x = self.convlstm2(x)
        x = self.convlstm3(x)
        print(x.shape)
        x = self.global_avg_pooling(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)
        x = self.softmax(x)
        return x


In [23]:
num_classes = nb_classes

model = Conv3DLSTMModel((3,patch_size,img_cols,img_rows),num_classes)
summary(model,(3,patch_size,img_cols,img_rows))

: 

In [None]:
learning_rate = 0.005
sgd = optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
X_train_new, X_val_new, y_train_new,y_val_new = train_test_split(train_set, Y_train, test_size=0.2,random_state=42)

In [None]:
batch_size = 30
nb_epoch = 300
#steps_per_epoch=int((len(X_val_new)*1.5)/batch_size)
# lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.05, 
#   cooldown=0, patience=10, min_lr=0.005/(2^4),verbose=1)
hist = model.fit(
    X_train_new,
    y_train_new,
    validation_data=(X_val_new,y_val_new),
    batch_size=batch_size,
    epochs = nb_epoch,
    shuffle=True,
    verbose=1
    )

In [None]:
training_loss = hist.history['loss']
val_loss = hist.history['val_loss']

plt.plot(training_loss, label="training_loss")
plt.plot(val_loss, label="validation_loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Learning Curve")
plt.legend(loc='best')
plt.show()

NameError: name 'hist' is not defined

In [None]:
training_acc = hist.history['acc']
val_acc = hist.history['val_acc']

plt.plot(training_acc, label="training_accuracy")
plt.plot(val_acc, label="validation_accuracy")
plt.xlabel("Epochs")
plt.ylabel("accuracy")
plt.title("Learning Curve")
plt.legend(loc='best')
plt.show()

In [None]:
test_pred =model.predict(X_train_new[50:70])
result = np.argmax(test_pred, axis =1)
print(result)

NameError: name 'model1' is not defined

In [None]:
from sklearn.metrics import confusion_matrix
met = confusion_matrix(np.argmax(y_val_new,axis =1), np.argmax(model.predict(X_val_new),axis =1))
print(met)

In [None]:
import itertools
def confusion_matrix_plot(cm, classes, 
                          title='Normalized Confusion Matrix', 
                          normalize=True, 
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.around(cm, decimals=2)
        cm[np.isnan(cm)] = 0.0
    plt.subplots(1, 1, figsize=(8, 8))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
confusion_matrix_plot(met, classes = ["Rolling Hand Backward", "Rolling Hand Forward","No gesture", "Swiping Left", "Swiping Right", "Stop Sign", "Thumb Up","Thumb Down", "Zooming Out With Full Hand","Zooming In With Full Hand","Shaking Hand","Drumming Fingers","Swiping Up","Swiping Down","Sliding Two Fingers Down"])