In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import requests

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [5]:
URL = "http://10.159.64.30"
cap = cv2.VideoCapture(URL + ":81/stream")
#requests.get(URL + "/control?var=framesize&val={}".format(8))
#cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        # Draw landmarks
        draw_styled_landmarks(image, results)
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()


I0000 00:00:1730579998.288842 4165105 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro


<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>


2024-11-02 15:39:59.462 python[63521:4165105] +[IMKClient subclass]: chose IMKClient_Modern
2024-11-02 15:39:59.462 python[63521:4165105] +[IMKInputSession subclass]: chose IMKInputSession_Modern


<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

KeyboardInterrupt: 

### extract the keypoints

In [14]:
len(results.face_landmarks.landmark)

468

In [39]:
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3) ### number of the landmakrs could cofirm by the runing process

In [8]:
len(pose)
pose.shape

(132,)

In [7]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [10]:
extract_keypoints(results).shape

(1662,)

## data collection

In [13]:
# Path for exported data, numpy arrays
#DATA_PATH = os.path.join('MP_Data') 
DATA_PATH = './MP_Data'
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

# Actions that we try to detect
actions = np.array(['light attack', 'heavy attack', 'doge','Kame Hame Ha','no move'])

# Thirty videos worth of data
no_sequences = 30
# Videos are going to be 30 frames in length
sequence_length = 30
# Folder start
start_folder = 1

In [16]:
!tree MP_Data

[01;34mMP_Data[0m
├── [01;34m1[0m
│   ├── [01;34m1[0m
│   │   ├── [00m0.npy[0m
│   │   ├── [00m1.npy[0m
│   │   ├── [00m10.npy[0m
│   │   ├── [00m11.npy[0m
│   │   ├── [00m12.npy[0m
│   │   ├── [00m13.npy[0m
│   │   ├── [00m14.npy[0m
│   │   ├── [00m15.npy[0m
│   │   ├── [00m16.npy[0m
│   │   ├── [00m17.npy[0m
│   │   ├── [00m18.npy[0m
│   │   ├── [00m19.npy[0m
│   │   ├── [00m2.npy[0m
│   │   ├── [00m20.npy[0m
│   │   ├── [00m21.npy[0m
│   │   ├── [00m22.npy[0m
│   │   ├── [00m23.npy[0m
│   │   ├── [00m24.npy[0m
│   │   ├── [00m25.npy[0m
│   │   ├── [00m26.npy[0m
│   │   ├── [00m27.npy[0m
│   │   ├── [00m28.npy[0m
│   │   ├── [00m29.npy[0m
│   │   ├── [00m3.npy[0m
│   │   ├── [00m4.npy[0m
│   │   ├── [00m5.npy[0m
│   │   ├── [00m6.npy[0m
│   │   ├── [00m7.npy[0m
│   │   ├── [00m8.npy[0m
│   │   └── [00m9.npy[0m
│   ├── [01;34m10[0m
│   │   ├── [00m0.npy[0m
│   │   ├── [00m1.npy[0m
│   │   ├── [00m10.npy[0m
│   │

In [14]:
for action in actions: 
    for sequence in range(1,no_sequences+1):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass



In [15]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(start_folder, start_folder+no_sequences):
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):


                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # Draw landmarks
                draw_styled_landmarks(image, results)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(10)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                print(sequence)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1729992698.973941 20255948 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro


1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
12
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
15
15
15
1

In [40]:
cap.release()
cv2.destroyAllWindows()

In [16]:
label_map = {label:num for num, label in enumerate(actions)}

In [17]:
label_map

{'light attack': 0,
 'heavy attack': 1,
 'doge': 2,
 'Kame Hame Ha': 3,
 'no move': 4}

In [32]:
sequences, labels = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [33]:
np.array(sequences).shape

(150, 30, 1662)

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim


class LSTMDenseModel(nn.Module):
    def __init__(self, input_size=1662, seq_len=30, num_classes=3):
        super(LSTMDenseModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size, 64, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(64, 1024, num_layers=1, batch_first=True)
        self.lstm3 = nn.LSTM(1024, 128, num_layers=1, batch_first=True)
        
        # Fully connected layers
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes)

    def forward(self, x):
        # x: (batch_size, seq_len, input_size)
        x, _ = self.lstm1(x)
        x = F.relu(x)
        
        x, _ = self.lstm2(x)
        x = F.relu(x)
        
        x, _ = self.lstm3(x)
        x = F.relu(x)

        # Only take the last output of the last LSTM layer
        x = x[:, -1, :]  # (batch_size, 64)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return F.softmax(x, dim=1)

# Example usage
model = LSTMDenseModel(input_size=1662, seq_len=30, num_classes=3)
print(model)

LSTMDenseModel(
  (lstm1): LSTM(1662, 64, batch_first=True)
  (lstm2): LSTM(64, 1024, batch_first=True)
  (lstm3): LSTM(1024, 128, batch_first=True)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=3, bias=True)
)


In [34]:
X = np.array(sequences)
y = np.array(labels)
print(y)

print(X.shape, y.shape)


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4]
(150, 30, 1662) (150,)


In [35]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).long()

train_dataset = DataLoader(torch.utils.data.TensorDataset(X_train, y_train), batch_size=8)

X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()

test_dataset = DataLoader(torch.utils.data.TensorDataset(X_test, y_test), batch_size=8)



In [36]:

model = LSTMDenseModel(input_size=1662, seq_len=30,num_classes=len(actions))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

LSTMDenseModel(
  (lstm1): LSTM(1662, 64, batch_first=True)
  (lstm2): LSTM(64, 1024, batch_first=True)
  (lstm3): LSTM(1024, 128, batch_first=True)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=5, bias=True)
)

In [37]:

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 200

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_dataset:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataset):.4f}')

Epoch [1/200], Loss: 1.6093
Epoch [2/200], Loss: 1.6094


KeyboardInterrupt: 

In [132]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_dataset:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')


Accuracy: 0.00%


In [133]:
# Save the trained model
model_path = 'gesture_lstm_model_v1.pth'
torch.save(model.state_dict(), model_path)
print(f'Model saved to {model_path}')

Model saved to gesture_lstm_model_v1.pth


In [96]:
# Initialize the model architecture
model_path = 'gesture_lstm_model_v1.pth'
model = LSTMDenseModel(input_size=1662, seq_len=30, num_classes=len(label_map))

# Load the saved model parameters
model.load_state_dict(torch.load(model_path))
model.eval()  # Set to evaluation mode
model.to(device)

LSTMDenseModel(
  (lstm1): LSTM(1662, 64, batch_first=True)
  (lstm2): LSTM(64, 128, batch_first=True)
  (lstm3): LSTM(128, 64, batch_first=True)
  (fc1): Linear(in_features=64, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=3, bias=True)
)

I0000 00:00:1729885707.783148 19464847 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro
2024-10-25 14:48:31.830 python[67205:19464847] _TIPropertyValueIsValid called with 16 on nil context!
2024-10-25 14:48:31.830 python[67205:19464847] imkxpc_getApplicationProperty:reply: called with incorrect property value 16, bailing.
2024-10-25 14:48:31.830 python[67205:19464847] Text input context does not respond to _valueForTIProperty:


testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
testing2
t

2024-10-25 14:48:56.762 python[67205:19464847] _TIPropertyValueIsValid called with 16 on nil context!
2024-10-25 14:48:56.763 python[67205:19464847] imkxpc_getApplicationProperty:reply: called with incorrect property value 16, bailing.
2024-10-25 14:48:56.763 python[67205:19464847] Text input context does not respond to _valueForTIProperty:


In [103]:
classes = ['hello', 'thanks', 'iloveyou']
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [4]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            #res = model.predict(np.expand_dims(sequence, axis=0))[0]
            with torch.no_grad():
                input_data = np.array(sequence)
                input_data = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
                res = model(input_data).cpu().numpy().squeeze()
                print(actions[np.argmax(res)])
                predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()



NameError: name 'mp_holistic' is not defined