**Initial settings to point the notebook to the location of the project datasets**

In [0]:
from google.colab import drive
drive.mount('/content/Final_Year')

In [0]:
os.chdir('/content/Final_Year/My Drive/final')
!ls -l

**Importing the basic libraries that will be needed in the code scripts.**

In [0]:
import glob
import cv2
import os
import numpy as np
from keras.utils import np_utils
from skimage.io import imread
import pandas as pd
from google.colab.patches import cv2_imshow


In [0]:
categories = ['sit', 'stand', 'walk', 'push']

**Step1: Extract the frames from the video files and store it in directory.**

*   One frame every third frame is extracted.
*   10 such frames(F0 - F9) are extracted and stored ina directory named a0.
*   Next 10 frames(F1 - F10) are stored in directory a1...and so on until frames are there in video.

The frames are stored in a seperate directory called Demo with structure a{0-9}/img/img0 till img9.







In [0]:
#Save at /Demo/a*/img/img0-9.jpg    
def extract_frames(directory):
    video_paths = glob.glob(os.path.join('Dataset/Videos',directory, '*.avi'))
    print('Extracting frames from %d %s videos' % (len(video_paths), 'Demo'))
    image_id=0
    folder_id=0
    for video_path in video_paths:
        video = cv2.VideoCapture(video_path)
        frame_id = 0
        while(video.isOpened()):
            if(frame_id==0):
                i=1
                ret,frame10 = video.read()
                while(i<3 and ret==True):
                    ret,frame9 = video.read()
                    i+=1
                while(i>1 and ret==True):
                    ret,frame8 = video.read()
                    i-=1
                while(i<3 and ret==True):
                    ret,frame7 = video.read()
                    i+=1
                while(i>1 and ret==True):
                    ret,frame6 = video.read()
                    i-=1
                while(i<3 and ret==True):
                    ret,frame5 = video.read()
                    i+=1
                while(i>1 and ret==True):
                    ret,frame4 = video.read()
                    i-=1
                while(i<3 and ret==True):
                    ret,frame3 = video.read()
                    i+=1
                while(i>1 and ret==True):
                    ret,frame2 = video.read()
                    i-=1
                while(i<3 and ret==True):
                    ret,frame1 = video.read()
                    i+=1
            else:
                frame10=frame9
                frame9=frame8
                frame8=frame7
                frame7=frame6
                frame6=frame5
                frame5=frame4
                frame4=frame3
                frame3=frame2
                frame2=frame1
                i=1
                while(i<3 and ret==True):
                    ret,frame1 = video.read()
                    i+=1
            if(ret==False):
                break
            destination_directory = os.path.join('Dataset',directory, f'video{folder_id}','a'+str(image_id),'img')
            os.makedirs(destination_directory, exist_ok=True)
            image_path = os.path.join(destination_directory, 'img0' + '.jpg')
            cv2.imwrite(image_path, frame1)
            image_path = os.path.join(destination_directory, 'img1' + '.jpg')
            cv2.imwrite(image_path, frame2)
            image_path = os.path.join(destination_directory, 'img2' + '.jpg')
            cv2.imwrite(image_path, frame3)
            image_path = os.path.join(destination_directory, 'img3' + '.jpg')
            cv2.imwrite(image_path, frame4)
            image_path = os.path.join(destination_directory, 'img4' + '.jpg')
            cv2.imwrite(image_path, frame5)
            image_path = os.path.join(destination_directory, 'img5' + '.jpg')
            cv2.imwrite(image_path, frame6)
            image_path = os.path.join(destination_directory, 'img6' + '.jpg')
            cv2.imwrite(image_path, frame7)
            image_path = os.path.join(destination_directory, 'img7' + '.jpg')
            cv2.imwrite(image_path, frame8)
            image_path = os.path.join(destination_directory, 'img8' + '.jpg')
            cv2.imwrite(image_path, frame9)
            image_path = os.path.join(destination_directory, 'img9' + '.jpg')
            cv2.imwrite(image_path, frame10)


            image_id += 1
            frame_id+=1

        video.release()
        print('Extracted %d frames from %s' % (image_id, video_path))
        folder_id+=1

In [0]:
extract_frames('Demo')

**Step 2 : Detect individual person in each frame and get the coordinates of the person in the frame.**

*  In this step the coordinates of each person is detected using the open source libraray called YOLO.

* Person is searched for and detected in frame 0 ( F0 ) of each directory ,i.e. for each a{0-9}.

*   The detected coordinates of the humans are then stored in a{0-9} folder in a .txt file named as person{0-9}.txt, where person0 stores the coordinates of first person in frame 0 ( F0 ) and person1 stores coordinates of second person in frame 0 ( F0 ) and so on...



In [0]:

import cv2 as cv
import argparse
import sys
import numpy as np
import os.path

# Initialize the parameters
confThreshold = 0.3  # Confidence threshold
nmsThreshold = 0.4  # Non-maximum suppression threshold
inpWidth = 416  # Width of network's input image
inpHeight = 416  # Height of network's input image

# Load names of classes
classesFile = "coco.names"
classes = None
with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# Give the configuration and weight files for the model and load the network using them.
modelConfiguration = "yolov3.cfg"
modelWeights = "yolov3.weights"

net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

In [0]:
# Get the names of the output layers
def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]


# Remove the bounding boxes with low confidence using non-maxima suppression
def postprocess(frame, outs, folder, li):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]
    classIds = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                width = int(detection[2] * frameWidth)
                height = int(detection[3] * frameHeight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])

    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    box_values = []
    p = 0;
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]

        if classes[classIds[i]] == 'person':
            dim = pd.DataFrame()
            dim['l'] = [left]
            dim['t'] = [top]
            dim['w'] = width
            dim['h'] = height
            print(dim.head())
            if (dim.empty):
                print(folder)
            else:
                dim.to_csv(f'{folder}/person{p}.txt')
            p += 1
    if p == 0:
        li.append(folder)
        print("No person found in ", folder)

In [0]:
def detect(frame, folder,li):
    # Create a 4D blob from a frame.
    blob = cv.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)

    # Sets the input to the network
    net.setInput(blob)

    # Runs the forward pass to get output of the output layers
    outs = net.forward(getOutputsNames(net))

    # Remove the bounding boxes with low confidence
    postprocess(frame, outs,folder,li)
 
    k = cv2.waitKey(10)

In [0]:
import time
from google.colab.patches import cv2_imshow
categories=['sit','stand','walk','push']
videos_data={'sit':9,'stand':9,'walk':12,'push':4}

def detect_person(directory):
    data = []
    labels = []
    per_category=[]
    lists=[]
    li=[]
    videos=glob.glob(os.path.join('Dataset',directory,'video*'))
    for video in videos:
        folder_paths = glob.glob(os.path.join(video,'a*'))
        for folder in folder_paths:
            image_path = glob.glob(os.path.join(folder,'img', 'img0.jpg'))
            frame = cv.imread(image_path[0])
            detect(frame, folder,li)
        lists.append(li)
    return lists

In [0]:
images_without_person_demo=detect_person('Demo')

There are few frames which do not contain any human in them and the folder names of such folders are stored in the variable *images_without_person_demo*  these folder can later be deleted manually or by using the shutil package of python.

In [0]:
images_without_person_demo

**Step 3: Track the coordinates of the person in the next 9 frames of the image set**


---



*   MEEM tracker is used to detect the coordinates of the humans in the next 10 frames of the image set.
*   It accepts the coordinates of the person in frame 0 ( F0 ), the initial frame ( F0 ) and the folder containing the set of 10 frames.


*   It returns the tracked coordinates of that person in next 10 frames.

*   Each output{0-9}.txt file containing the coordinates of the person in other 10 frames corresponds to every input{0-9}.txt files which contain the coordinates of person in first frame ( F0 ).


NOTE: You need to place the MEEM tracker code in the same current directory.

In [0]:
import os
import matlab.engine
import glob

def track_from_matlab(directory):
    eng=matlab.engine.start_matlab()
    videos=glob.glob(os.path.join('D:/Dataset/',directory,'video*'))
    for video in videos:
        dirs=glob.glob(os.path.join(video,'a*'))
        for dir in dirs:
            text_files=glob.glob(os.path.join(dir, '*.txt'))
            for i in range(len(text_files)):
                jpg='jpg'
                res=eng.MEEMTrack(str(i),dir,jpg,False,text_files[i])
                print('From directory ',dir)
                print('------------------------------------------------')
    eng.quit()

In [0]:
track_from_matlab('Demo')

**Step 4: Extract the person out of all 10 frames.**


* The output file of MEEM tracker output{0-9}.txt contains the coordinates of the person{0-9} in all 10 frames that was detected in the YOLO stage.   

*   The region bounded by the output{0-9}.txt files is cropped out from each 10 images and saved into a subfolder res{0-9}.

*   Each individual person have their set of extracted images pertaining to that person only.




In [0]:
import re
import time
import cv2 as cv
from google.colab.patches import cv2_imshow

def split_person(directory):
    data = []
    labels = []
    per_category=[]
    lists=[]
    video_files=glob.glob(os.path.join('Dataset',directory,'video4'))
    for video_file in video_files:
        folder_paths = glob.glob(os.path.join(video_file,'a*'))
        for folder in folder_paths:
            txt_files=glob.glob(os.path.join(folder,'output*.txt'))
            print(folder,len(txt_files))
            for txt_file in txt_files:
                crop_df=pd.read_csv(txt_file,names=['l','t','w','h'])
                t=re.findall('\d+',txt_file)[-1]
                destination_directory = os.path.join(folder,f'res{t}')
                os.makedirs(destination_directory, exist_ok=True)
                for index, row in crop_df.iterrows():
                    image_path = glob.glob(os.path.join(folder,'img', f'img{index}.jpg'))
                    if image_path==[]:
                        continue
                    frame = cv.imread(image_path[0])
                    t=int(row['t'])
                    l=int(row['l'])
                    h=int(row['h'])
                    w=int(row['w'])
                    ch=int(h/2)+t
                    cw=int(w/2)+l
                    if(h<300):
                        h=300
                    if(w<300):
                        w=300
                    t=ch-int(h/2)
                    l=cw-int(w/2)
                    crop=frame[t:t+h,l:l+w]
                    crop_path = os.path.join(destination_directory, f'img{index}.jpg')
                    cv2.imwrite(crop_path, crop)

In [0]:
split_person('Demo')

**Step 5: Get the input feature for Demo data.**


*   Read the cropped images for each person. 
*   The images are then reshaped to 96\*96 size and is converted into gray scale values.
*   All the images are then concatenated together to give a structure of the shape 10 \* 96 \* 96 \* 1.
*   This formsas input to the CNN model which gives the probabiltiy of the event occuring.



In [0]:
def load_X_data(train_or_test):
    data = []
    per_category=[]
    file_paths=pd.Series()
    video_folders = glob.glob(os.path.join('./Dataset/Demo', 'video3'))    
    for video_folder in video_folders:
        folders = glob.glob(os.path.join(video_folder, 'a*'))
        print("%3s. %-7d files" % (video_folder, len(folders)))
        per_category.append(len(folders))
        for folder in folders:
            results = glob.glob(os.path.join(folder, 'res*'))
            for res in results:
                try:
                    fdi=[]
                    image1 = cv2.imread(os.path.join(res,'img0.jpg'))
                    image2 = cv2.imread(os.path.join(res,'img1.jpg'))
                    image3 = cv2.imread(os.path.join(res,'img2.jpg'))
                    image4 = cv2.imread(os.path.join(res,'img3.jpg'))
                    image5 = cv2.imread(os.path.join(res,'img4.jpg'))
                    image6 = cv2.imread(os.path.join(res,'img5.jpg'))
                    image7 = cv2.imread(os.path.join(res,'img6.jpg'))
                    image8 = cv2.imread(os.path.join(res,'img7.jpg'))
                    image9 = cv2.imread(os.path.join(res,'img8.jpg'))
                    image10 = cv2.imread(os.path.join(res,'img9.jpg'))
                    image1 = cv2.resize(image1, (96,96))
                    image2 = cv2.resize(image2, (96,96))
                    image3 = cv2.resize(image3, (96,96))
                    image4 = cv2.resize(image4, (96,96))
                    image5 = cv2.resize(image5, (96,96))
                    image6 = cv2.resize(image6, (96,96))
                    image7 = cv2.resize(image7, (96,96))
                    image8 = cv2.resize(image8, (96,96))
                    image9 = cv2.resize(image9, (96,96))
                    image10 = cv2.resize(image10, (96,96))
                    image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
                    image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
                    image3 = cv2.cvtColor(image3, cv2.COLOR_BGR2GRAY)
                    image4 = cv2.cvtColor(image4, cv2.COLOR_BGR2GRAY)
                    image5 = cv2.cvtColor(image5, cv2.COLOR_BGR2GRAY)
                    image6 = cv2.cvtColor(image6, cv2.COLOR_BGR2GRAY)
                    image7 = cv2.cvtColor(image7, cv2.COLOR_BGR2GRAY)
                    image8 = cv2.cvtColor(image8, cv2.COLOR_BGR2GRAY)
                    image9 = cv2.cvtColor(image9, cv2.COLOR_BGR2GRAY)
                    image10 = cv2.cvtColor(image10, cv2.COLOR_BGR2GRAY)
                    image=np.concatenate((image1,image2,image3,image4,image5,image6,image7,image8,image9,image10)).reshape(-1,96,96,1)
                    data.append(image)
                    file_paths=file_paths.append(pd.Series(res))
                except Exception as e:
                    print('Error in %s'% res)
                    print(e)
    X = np.array(data)
    X = X / 255.
    file_paths=file_paths.reset_index(drop=True)
    
    print('X_%s.shape:' % train_or_test, X.shape)
    print('per_%s_file.shape:' % train_or_test, per_category)
    print('total files %d' % len(file_paths))
    return X,file_paths

In [0]:
X_demo, file_paths = load_X_data('Demo')

The input features can be saved into a pickle file inorder to avoid recompution of the features in case of timeout on Google Colab or some other local computation reasons.

In [0]:
# import pickle
# with open('demo96.pickle', 'wb') as f:
#     pickle.dump([X_demo,file_paths], f)

In [0]:
import pickle
with open('demo96.pickle', 'rb') as f:
    X_demo,file_paths = pickle.load(f)

In [0]:
from keras.models import Sequential
from keras.layers import Conv3D, MaxPooling3D, GlobalAveragePooling3D
from keras.layers.core import Dense, Activation, Dropout

def load_model(inputShape,classes):
    model = Sequential()

    model.add(Conv3D(filters=32, kernel_size=(1, 4, 4), strides=(2, 1, 1), padding='valid', activation='relu', 
                     input_shape=inputShape))
    model.add(MaxPooling3D(pool_size=2, strides=(1, 2, 2), padding='same'))

    model.add(Conv3D(filters=64, kernel_size=(1, 4, 4), strides=(2, 1, 1), padding='valid', activation='relu'))
    model.add(MaxPooling3D(pool_size=2, strides=(1, 2, 2), padding='same'))

    model.add(Dropout(0.4))    
    
    model.add(Conv3D(filters=128, kernel_size=(1, 4, 4), strides=(2, 1, 1), padding='valid', activation='relu'))
    model.add(MaxPooling3D(pool_size=2, strides=(1, 2, 2), padding='same'))

    model.add(Conv3D(filters=64, kernel_size=(1, 2, 2), strides=(2, 1, 1), padding='valid', activation='relu'))
    model.add(MaxPooling3D(pool_size=2, strides=(1, 2, 2), padding='same'))
    
    model.add(Conv3D(filters=32, kernel_size=(1, 2, 2), strides=(2, 1, 1), padding='valid', activation='relu'))
    model.add(MaxPooling3D(pool_size=2, strides=(1, 2, 2), padding='same'))
    
    model.add(Dropout(0.3))    
    
    model.add(GlobalAveragePooling3D())

    model.add(Dropout(0.3))
    
    model.add(Dense(8, activation='relu'))

    model.add(Dense(classes, activation='softmax'))

    print(model.summary())
    return model

**Two strategies were deployed and checked for better results**


*   One 3DCNN model that gives prediction for all the classes of events (sit, stand, walk, push) that could possibly occur.
*   Four seperate 3DCNN models that individually checked for probabilty of occurance of each of the individual class of event.



**Model 1: One model for all class of events.**


*   The model outptus the probability of the set of images to represent each of the classes of event.
*   The event which has highest probability is selected as the event that is occuring and is selected for labelling on the image.



In [0]:
model=load_model((10, 96, 96, 1),4)

# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.load_weights('Dataset/result1/Model_1_10_96_96_4.weights.best.hdf5')

predictions = model1.predict_proba(X_demo)
predictions=np.argmax(predictions,axis=1)

**Model 2: Four seperate model for each class of events.**


*   Each of the four model outptus the probability of the set of images to represent that classes of event.
*   Out of all thet probabilities the probability which is highest is selected as the event that is occuring and is selected for labelling on the image.



In [0]:
model1=load_model((10, 96, 96, 1),2)
model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model1.load_weights('Dataset/result2/Model_1_10_96_96_2.weights.best.hdf5')

predictions1 = model1.predict_proba(X_demo)[:,1]

model2=load_model((10, 96, 96, 1),2)
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.load_weights('Dataset/result2/Model_2_10_96_96_2.weights.best.hdf5')

predictions2 = model2.predict_proba(X_demo)[:,1]

model3=load_model((10, 96, 96, 1),2)
model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model3.load_weights('Dataset/result2/Model_3_10_96_96_2.weights.best.hdf5')

predictions3 = model3.predict_proba(X_demo)[:,1]

model4=load_model((10, 96, 96, 1),2)
model4.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model4.load_weights('Dataset/result2/Model_4_10_96_96_2.weights.best.hdf5')

predictions4 = model4.predict_proba(X_demo)[:,1]

print(len(predictions1),len(predictions2),len(predictions3),len(predictions4))
predictions = [predictions1] + [predictions2] + [predictions3] + [predictions4]
print(len(predictions))
predictions = list(map(list, zip(*predictions)))
print(len(predictions))
predictions=np.argmax(predictions,axis=1)

**Step 6: Labelling the images and generating video.**


1.   In this step the frame 0 ( F0 ) of all the folders is labelled with corresponding predition and the image labelled for each human detected is stored in the directory names 'res'.
2.   The images are stored in the alphabetical order of how they occur in the video and thus have the same order in which they can be joined to form a video.



In [0]:
import re
import cv2 as cv
categories=['sit', 'stand', 'walk', 'push']
def draw_boxes(file_paths, predictions):
    file_paths.sort_values(inplace=True)
    video_folders = glob.glob(os.path.join('./Dataset/Demo', 'video*'))    
    for video_folder in video_folders:
        video_data=file_paths[file_paths.str.contains(video_folder+'/')]
        destination_directory = os.path.join(video_folder,'res')
        os.makedirs(destination_directory, exist_ok=True)
                
        frame_folders = glob.glob(os.path.join(video_folder,'a*'))    
        for i in range(len(frame_folders)):
            frame_data=video_data[video_data.str.contains(frame_folders[i]+'/')]
            if frame_data.empty:
                continue
            print(frame_data)
            print(re.findall('\d+',frame_data.iloc[0])[1])
            person_no=frame_data.apply(lambda x: re.search('\d+$',x).group(0))
            person_files=person_no.apply(lambda x : '/output' + x + '.txt')
            
            image_path = os.path.join(frame_folders[i],'img','img0.jpg')
            frame=cv2.imread(image_path)
            img_no=re.findall('\d+',frame_data.iloc[0])[1]
            save_path = os.path.join(destination_directory, 'img'+img_no+'.jpg')
            print('---------------------------------------')
            for index,row in person_files.items():
                label=categories[predictions[index]]
                df=pd.read_csv(frame_folders[i]+row)
                left=int(df.iloc[0][0])
                top=int(df.iloc[0][1])
                width=int(df.iloc[0][2])
                height=int(df.iloc[0][3])
                right= left+width
                bottom=top+height               
                cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
                labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                top = max(top, labelSize[1])
                cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine),
                             (255, 255, 255), cv.FILLED)
                cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1)
                
            cv.imwrite(save_path, frame)

In [0]:
draw_boxes(file_paths,predictions)