In [1]:

!pip3 install tensorflow opencv-python mediapipe scikit-learn matplotlib moviepy





In [2]:
import cv2 
import numpy as np 
import os # easier to acess files
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import moviepy
print("Imported")


Imported


In [3]:
mp_holistic = mp.solutions.holistic # holistic model - make detections
mp_drawing = mp.solutions.drawing_utils # drawing utilities - draw detections

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # color conversion
    # cv2Color converts images from one colorspace to another
    image.flags.writeable = False
    results = model.process(image) # make prediction from image grame
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # color conversion
    return image, results

In [5]:
def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                                mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)) # draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(80,110,10), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)) # draw face connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)) # draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

In [6]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    return np.concatenate([pose, face, lh, rh])

In [7]:
# path for exported data, numpy arrays
DATA_PATH = os.path.join("MP_Data")

#Actions that we try to detect
actions = np.array(['hello', 'thanks', 'iloveyou', 'yes'])

# thirty videos with of data
no_sequences = 30

# videos are going to be 30 frames of length
sequence_length = 30

#1662 datapoints, 30 

In [8]:
for action in actions:
    # for each action
    for sequence in range(no_sequences):
        try:
            # if folder already exist, will pass, else make numbered folders in action
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [9]:
from sklearn.model_selection import train_test_split # used for training and testing
from tensorflow.keras.utils import to_categorical # used to make labels

In [10]:
label_map = {label:num for num, label in enumerate(actions)}

In [11]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = [] # all frames for specific sequence (video)
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            # loads numpy frame 0, frame 1, .. 
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
        # append 

In [12]:
X = np.array(sequences) # makes to np array
y = to_categorical(labels).astype(int) # uses one hot encoding to prevent bias

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05) # splits data

In [14]:
from tensorflow.keras.models import Sequential # Sequential lets you build a sequential NN
from tensorflow.keras.layers import LSTM, Dense # LSTM is temporal (involves time) and lets build model
from tensorflow.keras.callbacks import TensorBoard # allows to logging in tensor board

In [15]:
# Tensorboard is webapp to see neural network training
log_dir = os.path.join("Logs")
tb_callback = TensorBoard(log_dir=log_dir)

In [16]:
model = Sequential() # easy to make neural network

# 3 sets of lstm levels 
# first adds 64 neurons, and if it needs to pass to next level return seqs must be true, 30 frames with 1662 datapoints
# lstm layer is good for long term sequences
# dont return lstm layer on last layer
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

# actions will turn output into 3 layers, and softmax will show probabiliies from sum




In [17]:
model.compile(optimizer='Adam', loss="categorical_crossentropy", metrics=["categorical_accuracy"])




In [18]:
model.fit(X_train, y_train, epochs=150, callbacks=[tb_callback])
model.load_weights('gen5.h5')

Epoch 1/150


Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 64)            442112    
                                                                 
 lstm_1 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 4)                 132       
                                                                 
Total params: 596708 (2.28 MB)
Trainable params: 596708 

In [20]:
# res = model.predict(X_test)
# actions[np.argmax(res[3])]
# actions[np.argmax(y_test[3])]

In [21]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [22]:
yhat = model.predict(X_test)



In [23]:
ytrue = np.argmax(y_test, axis=1).tolist() # conv
yhat = np.argmax(yhat, axis =1).tolist()

In [24]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[5, 0],
        [1, 0]],

       [[4, 0],
        [0, 2]],

       [[4, 1],
        [0, 1]],

       [[4, 0],
        [0, 2]]], dtype=int64)

In [25]:
accuracy_score(ytrue, yhat)

0.8333333333333334

In [26]:
# needs 30 frames to make a movement
sequence = []
sentence = []
threshold = 0.4

from moviepy.editor import VideoFileClip
import moviepy.video.fx.all as vfx



# in_loc = "../uploads/video.mp4"
# out_loc = "../uploads/video_sped.mp4"

# # Import video clip
# clip = VideoFileClip(in_loc)

# # Modify the FPS
# clip = clip.fx(vfx.speedx, 2)  # Adjust speed factor as needed

# # Save video clip
# clip.write_videofile(out_loc, codec="libx264", audio_codec="aac")

# # Save video clip
# # final.write_videofile(out_loc)

# # code to access openCV

cap = cv2.VideoCapture("../uploads/video.mp4")  # Access video file # access video cam on device port 0

# with is used to handle resource management
# set mediapipe model


# min detection is initial detection, tracking confidence is preceding tracking confidence
# if you want higher inital confidence in answer, then increase it and vice versa

i=0 #frame counter
frameTime = 1


action_list = []
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
	while cap.isOpened(): # while camera is on

		# i=i+1 #increment counter
		# if i % 3 != 0: # display only one third of the frames, you can change this parameter according to your needs
		# 	continue
		# Read feed (reading frame from webcam)
		ret, frame = cap.read()

		# if ret == False:
		# 	break


			# ret, frame = cap.retrieve() #decode frame
			

		

		# make detections
		image, results = mediapipe_detection(frame, holistic)
		
		# print(results)

		# draw landmarks
		draw_styled_landmarks(image, results)
		
        # 2. prediction logic
		keypoints = extract_keypoints(results)
		sequence.insert(0,keypoints)
		sequence = sequence[:30]
		print(len(sequence))
		
		if (len(sequence) == 30):
			res = model.predict(np.expand_dims(sequence, axis=0))[0]
			# expand dims allows us to test one sequence since its expecting (0, 30, 1662)
			print("Seen action: {}", actions[np.argmax(res)])
			action_list.append(actions[np.argmax(res)])


		# show frame to screen
		cv2.imshow('OpenCV Feed', image)
		
		# break gracefully
		if cv2.waitKey(1) & 0xFF == ord('q'):
			break
		
OUTPUT_PATH = os.path.join("outputs") 
output = os.path.join(OUTPUT_PATH, "test")
print(output)
np.save(output, action_list)

# release cv2 and close all windows
cv2.destroyAllWindows()
cv2.waitKey(1)
cap.release()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyou
30
Seen action: {} iloveyo

error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


: 

In [None]:
print("got to here")

got to here


In [None]:
import sys

sys.exit()

SystemExit: 