In [1]:
import os
import cv2
import numpy as np
import random
from time import time

from tqdm import tqdm
import pickle
import pandas as pd
from tensorflow.keras.utils import to_categorical

In [2]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [3]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential

In [4]:
df = pd.read_csv("AllLabels.csv")
df.head()

Unnamed: 0,ClipID,Boredom,Engagement,Confusion,Frustration
0,1100011002.avi,0,2,0,0
1,1100011003.avi,0,2,0,0
2,1100011004.avi,0,3,0,0
3,1100011005.avi,0,3,0,0
4,1100011006.avi,0,3,0,0


In [5]:
print(len(df.index))

8925


In [6]:
names = []

for n in tqdm(range(len(df.index))):
    names.append(df['ClipID'][n])

100%|██████████████████████████████████████████████████████████████████████████| 8925/8925 [00:00<00:00, 189951.71it/s]


In [7]:
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input

image_shape = (160,120,3)
base_model = InceptionV3(input_tensor=layers.Input(image_shape),weights='imagenet',include_top=False)

#incept = Model(inputs=base_model.input,outputs=base_model.get_layer('avg_pool').output)
incept = Model(inputs=base_model.input,outputs=base_model.output)

In [8]:
img_height , img_width = 120, 160
seq_len = 16

In [9]:
def frames_extraction(video_path):
    frames_list = []     
    vidObj = cv2.VideoCapture(video_path)
    count = 1 
    
    while count <= seq_len:          
        success, image = vidObj.read() 
        if success:
            image = cv2.resize(image, (img_height, img_width))
            cv2.imwrite("image.jpg",image)
            image = np.expand_dims(image,axis=0)
            image = preprocess_input(image)
            features = incept.predict(image)[0]
            frames_list.append(features)
            count += 1
        else:
            print("Defected frame",video_path.split('/')[-1])
            break
            
    return frames_list

In [10]:
path = 'dataset/'
# z1 = df[['Boredom']]
# z2 = df[['Engagement']]
# z3 = df[['Confusion']]
# z4 = df[['Frustration']]
z = df[['Boredom', 'Engagement', 'Confusion', 'Frustration']]

In [None]:
X,y = list(), list()
   
for folders in os.listdir(path):
    folder = path + folders
    for items in tqdm(os.listdir(folder)):
        data = folder + '/' + items
        for subfolders in os.listdir(data):
            subfolder = data + '/' + subfolders
            for m, name in enumerate(names):
                if subfolder.split('/')[-1] == name:
                    frames = frames_extraction(subfolder)
                    X.append(frames)
                    y.append(z.values[m]) 

100%|████████████████████████████████████████████████████████████████████████████████| 114/114 [03:33<00:00,  1.87s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 41/41 [01:26<00:00,  2.10s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 34/34 [01:16<00:00,  2.25s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 93/93 [03:59<00:00,  2.58s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [04:25<00:00,  3.36s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 142/142 [07:44<00:00,  3.27s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:35<00:00,  3.99s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.22s/it]
100%|███████████████████████████████████

In [None]:
X = np.array(X)
y = np.array(y)
print(X.shape, y.shape)

In [None]:
from tensorflow.keras.utils import to_categorical

y = to_categorical(y)
print(y.shape)

In [None]:
input_ = layers.Input((2,2048))
x = layers.LSTM(512, return_sequences=False)(input_)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x1 = layers.Dense(4, activation='softmax')(x)
# x2 = layers.Dense(4, activation='softmax')(x)
# x3 = layers.Dense(4, activation='softmax')(x)
# x4 = layers.Dense(4, activation='softmax')(x)

model = Model(inputs=input_, outputs=x1)#[x1,x2,x3,x4])
model.summary()

In [None]:
opt = keras.optimizers.Adam()
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=["accuracy"])

In [None]:
epochs = 10
number_videos_per_bath = 1
steps = 8925//number_videos_per_bath

In [None]:
log_dir = "log/" 
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir)

model.fit(X,y,  epochs=200, batch_size = 256, validation_split = 0.35,callbacks=[tensorboard_callback],verbose=1)    

In [None]:
model.save("classroom.h5")

In [None]:
import cv2
import numpy as np
from keras.models import load_model


clf = load_model("classroom.h5")

cap = cv2.VideoCapture(0)
ret, frame = cap.read() 
classes = ['Boredom','Engagement','Confusion','Frustration']
values = [0,1,2,3]

def predict(frame):
    frames=[]
    for i in range(0,1):
        ret, frame = cap.read()
        original = frame.copy()
        image = cv2.resize(frame, (299, 299))
        image = np.expand_dims(image,axis=0)
        image = preprocess_input(image)
        features = incept.predict(image)[0]
        frames.append(features)
    return frames


while(True):
    ret, frame = cap.read() 
    if frame is None:
        break
    frames = predict(frame)
    frames = np.array(frames)    
    new_feature = np.expand_dims(frames,axis=0)
    pred=clf.predict(new_feature)
    print(classes[0]  + ": " + str(values[np.argmax(pred[0])]),classes[1]  + ": " + str(values[np.argmax(pred[1])]),
          classes[2]  + ": " + str(values[np.argmax(pred[2])]),classes[3]  + ": " + str(values[np.argmax(pred[3])]))
    print(" ")
    print("...................")
    
    cv2.imshow("output", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()