In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.1.1-py3-none-any.whl (699 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m699.8/699.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting hub-sdk>=0.0.2 (from ultralytics)
  Downloading hub_sdk-0.0.3-py3-none-any.whl (37 kB)
Installing collected packages: hub-sdk, thop, ultralytics
Successfully installed hub-sdk-0.0.3 thop-0.1.1.post2209072238 ultralytics-8.1.1


In [None]:
from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

Mounted at /content/gdrive


# **Level 1 : Performing Pose Estimation**

To perform pose estimation using a model of your choice on the frames from the videos provided. The goal is to identify key parts of the human body in each frame.

Introduction:

•   Choose a pre-trained human pose estimation model.

•   Apply the model on each frame from the provided videos, identifying the key parts of the human body, generating an annotated video.

## Load the model

I choose YOLOv8 Model

In [None]:
from ultralytics import YOLO
import cv2
from google.colab.patches import cv2_imshow

# Change the path to where you save the pretrained weight
model = YOLO('/content/gdrive/MyDrive/KinetixPro/yolov8s-pose.pt')

# change the video path to where you save your video
video_path = "/content/gdrive/MyDrive/KinetixPro/video/Samurai qualifications.mp4"
cap = cv2.VideoCapture(video_path)

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc('F','M','P','4')
output_path = "/content/gdrive/MyDrive/KinetixPro/video/output_level1.avi"
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 inference on the frame
        results = model(frame, verbose = False)

        # Visualize the results on the frame
        annotated_frame = results[0].plot(boxes = False)

        # Display the annotated frame
        # cv2_imshow(annotated_frame) # run this if you use google colab
        # cv2.imshow(annotated_frame) # run this if you use your personal device

        # save the video
        out.write(annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

ModuleNotFoundError: No module named 'ultralytics'

# **Level  2 : Video Data Labelling**

Building upon the result of level 1, extend your pose estimation to video data. Specifically, you need to annotate segments of the video that correspond to the cutting action.

instruction:

•   Extend your pose estimation to video frames, annotating the key body parts.

•   Manually label video segments that depict the action of cutting

## A. Extract picture



Is like screenshoot every frame from the video, but I limit the output to just get 500 image, you can change it to whatever you like in the frame_total variable

In [None]:
import cv2
from ultralytics import YOLO
import pandas as pd

model = YOLO('/content/gdrive/MyDrive/KinetixPro/yolov8s-pose.pt')

video_path = "/content/gdrive/MyDrive/KinetixPro/video/Samurai qualifications.mp4"
cap = cv2.VideoCapture(video_path)

frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
fps = cap.get(cv2.CAP_PROP_FPS)

seconds = round(frames/fps)

frame_total = 500
i = 0
a = 0

all_data = []

while (cap.isOpened()):
  cap.set(cv2.CAP_PROP_POS_MSEC, (i * ((seconds/frame_total)*1000)))
  flag, frame = cap.read()

  if flag == False:
    break

  # change the path to where you want to save the image
  image_path = f'/content/gdrive/MyDrive/KinetixPro/level 2 output/image/img_{i}.jpg'
  cv2.imwrite(image_path, frame)

  results = model(frame, verbose=False)

  for r in results:
    bound_box = r.boxes.xyxy
    conf = r.boxes.conf.tolist()
    keypoints = r.keypoints.xyn.tolist()

    # this code for save every human that detected from 1 image, so if 1 image have 10 people, we will save 10 human picture.

    for index, box in enumerate(bound_box):
      if conf[index] > 0.75:
        x1, y1, x2, y2 = box.tolist()
        pict = frame[int(y1):int(y2), int(x1):int(x2)]
        output_path = f'/content/gdrive/MyDrive/KinetixPro/level 2 output/samurai/person_{a}.jpg'

        data = {'image_name': f'person_{a}.jpg'}

        # Initialize the x and y lists for each possible key
        for j in range(len(keypoints[index])):
            data[f'x{j}'] = keypoints[index][j][0]
            data[f'y{j}'] = keypoints[index][j][1]

       # we save the human keypoint that detected by yolo model to csv file to train our DNN / Xgboost model later.

        all_data.append(data)
        cv2.imwrite(output_path, pict)
        a += 1

  i += 1

print(i-1, a-1)
cap.release()
cv2.destroyAllWindows()

# Combine all data dictionaries into a single DataFrame
df = pd.DataFrame(all_data)

# Save the DataFrame to a CSV file
csv_file_path = '/content/gdrive/MyDrive/KinetixPro/level 2 output/keypoints.csv'
df.to_csv(csv_file_path, index=False)

500 2218


In [None]:
df

Unnamed: 0,image_name,x0,y0,x1,y1,x2,y2,x3,y3,x4,...,x12,y12,x13,y13,x14,y14,x15,y15,x16,y16
0,person_0.jpg,0.453197,0.302032,0.456858,0.296662,0.450533,0.295797,0.463443,0.303055,0.445930,...,0.439252,0.431904,0.459417,0.505024,0.436765,0.505091,0.456204,0.575653,0.435257,0.577183
1,person_1.jpg,0.593066,0.281346,0.598191,0.270256,0.584436,0.270880,0.000000,0.000000,0.565777,...,0.557840,0.546775,0.594635,0.687903,0.551479,0.694512,0.592816,0.826573,0.540693,0.840095
2,person_2.jpg,0.181221,0.283046,0.184487,0.275668,0.176457,0.276563,0.189281,0.281917,0.168748,...,0.168403,0.437034,0.200166,0.516951,0.164599,0.518264,0.197627,0.591182,0.159425,0.592046
3,person_3.jpg,0.271258,0.295286,0.274229,0.289032,0.267471,0.289459,0.279029,0.294720,0.261305,...,0.262421,0.436683,0.287397,0.508463,0.259802,0.509953,0.286410,0.576630,0.256820,0.579549
4,person_4.jpg,0.348829,0.304597,0.352094,0.299045,0.345404,0.298286,0.000000,0.000000,0.338601,...,0.333417,0.439063,0.351476,0.515506,0.333969,0.514298,0.346653,0.586359,0.332365,0.587411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2214,person_2214.jpg,0.584296,0.211367,0.590805,0.201844,0.580306,0.202774,0.608928,0.206329,0.000000,...,0.571877,0.437319,0.607376,0.557337,0.564169,0.561978,0.609582,0.688850,0.562496,0.684933
2215,person_2215.jpg,0.173520,0.344559,0.176485,0.338928,0.170027,0.338583,0.180234,0.342715,0.163789,...,0.157932,0.451579,0.204094,0.461765,0.158238,0.464185,0.208484,0.529772,0.155735,0.533616
2216,person_2216.jpg,0.819771,0.441904,0.824267,0.436234,0.817228,0.435511,0.833301,0.439654,0.814173,...,0.818071,0.537515,0.836270,0.543164,0.791063,0.540727,0.840935,0.568638,0.000000,0.000000
2217,person_2217.jpg,0.921107,0.441252,0.925652,0.434964,0.917432,0.433961,0.932404,0.440919,0.911215,...,0.901749,0.559519,0.943077,0.552918,0.888104,0.549330,0.000000,0.000000,0.000000,0.000000


After we do this, we open the folder and create another folder and name it with your class label, the split the picture (like if picture 1 - 10 is cutting, move it to cutting folder etc).

## B. Create the dataset

This for train our DNN/XGboost model, if you want to detect pose with image classifier, you can skip this.

In [None]:
import os

# Path ke folder dataset
dataset_path = '/content/gdrive/MyDrive/KinetixPro/level 2 output/dataset/'

# Path ke folder cutting
cutting_path = os.path.join(dataset_path, 'cutting')
# List semua file di dalam folder cutting
cutting_files = os.listdir(cutting_path)

# Path ke folder non_cutting
non_cutting_path = os.path.join(dataset_path, 'non_cutting')
# List semua file di dalam folder non_cutting
non_cutting_files = os.listdir(non_cutting_path)

# Menampilkan list nama file di folder cutting
print("Files in cutting folder:", cutting_files)

# Menampilkan list nama file di folder non_cutting
print("Files in non_cutting folder:", non_cutting_files)

Files in cutting folder: ['person_1.jpg', 'person_8.jpg', 'person_31.jpg', 'person_14.jpg', 'person_23.jpg', 'person_53.jpg', 'person_65.jpg', 'person_72.jpg', 'person_58.jpg', 'person_92.jpg', 'person_85.jpg', 'person_86.jpg', 'person_104.jpg', 'person_98.jpg', 'person_113.jpg', 'person_126.jpg', 'person_122.jpg', 'person_134.jpg', 'person_143.jpg', 'person_152.jpg', 'person_160.jpg', 'person_174.jpg', 'person_166.jpg', 'person_182.jpg', 'person_200.jpg', 'person_211.jpg', 'person_204.jpg', 'person_217.jpg', 'person_189.jpg', 'person_244.jpg', 'person_225.jpg', 'person_232.jpg', 'person_238.jpg', 'person_253.jpg', 'person_283.jpg', 'person_271.jpg', 'person_292.jpg', 'person_262.jpg', 'person_310.jpg', 'person_302.jpg', 'person_317.jpg', 'person_326.jpg', 'person_335.jpg', 'person_345.jpg', 'person_354.jpg', 'person_365.jpg', 'person_370.jpg', 'person_373.jpg', 'person_380.jpg', 'person_396.jpg', 'person_385.jpg', 'person_398.jpg', 'person_402.jpg', 'person_415.jpg', 'person_448.jpg',

In [None]:
import pandas as pd
import os

df = pd.read_csv('/content/gdrive/MyDrive/KinetixPro/level 2 output/keypoints.csv')

# Path ke folder dataset
dataset_path = '/content/gdrive/MyDrive/KinetixPro/level 2 output/dataset/'
cutting_path = os.path.join(dataset_path, 'cutting')
non_cutting_path = os.path.join(dataset_path, 'non_cutting')


# Fungsi untuk menentukan label berdasarkan nama file
def get_label(image_name, cutting_path, non_cutting_path):
    if image_name in os.listdir(cutting_path):
        return 'cutting'
    elif image_name in os.listdir(non_cutting_path):
        return 'non_cutting'
    else:
        return None  # Tidak dapat menemukan file di kedua folder

# Menambahkan kolom label berdasarkan nama folder
df['label'] = df['image_name'].apply(lambda x: get_label(x, cutting_path, non_cutting_path))
df.to_csv(f'{dataset_path}dataset.csv', index=False)
df

Unnamed: 0,image_name,x0,y0,x1,y1,x2,y2,x3,y3,x4,...,y12,x13,y13,x14,y14,x15,y15,x16,y16,label
0,person_0.jpg,0.453197,0.302032,0.456858,0.296662,0.450533,0.295797,0.463443,0.303055,0.445930,...,0.431904,0.459417,0.505024,0.436765,0.505091,0.456204,0.575653,0.435257,0.577183,non_cutting
1,person_1.jpg,0.593066,0.281346,0.598191,0.270256,0.584436,0.270880,0.000000,0.000000,0.565777,...,0.546775,0.594635,0.687903,0.551479,0.694512,0.592816,0.826573,0.540693,0.840095,cutting
2,person_2.jpg,0.181221,0.283046,0.184487,0.275668,0.176457,0.276563,0.189281,0.281917,0.168748,...,0.437034,0.200166,0.516951,0.164599,0.518264,0.197627,0.591182,0.159425,0.592046,non_cutting
3,person_3.jpg,0.271258,0.295286,0.274229,0.289032,0.267471,0.289459,0.279029,0.294720,0.261305,...,0.436683,0.287397,0.508463,0.259802,0.509953,0.286410,0.576630,0.256820,0.579549,non_cutting
4,person_4.jpg,0.348829,0.304597,0.352094,0.299045,0.345404,0.298286,0.000000,0.000000,0.338601,...,0.439063,0.351476,0.515506,0.333969,0.514298,0.346653,0.586359,0.332365,0.587411,non_cutting
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2214,person_2214.jpg,0.584296,0.211367,0.590805,0.201844,0.580306,0.202774,0.608928,0.206329,0.000000,...,0.437319,0.607376,0.557337,0.564169,0.561978,0.609582,0.688850,0.562496,0.684933,non_cutting
2215,person_2215.jpg,0.173520,0.344559,0.176485,0.338928,0.170027,0.338583,0.180234,0.342715,0.163789,...,0.451579,0.204094,0.461765,0.158238,0.464185,0.208484,0.529772,0.155735,0.533616,non_cutting
2216,person_2216.jpg,0.819771,0.441904,0.824267,0.436234,0.817228,0.435511,0.833301,0.439654,0.814173,...,0.537515,0.836270,0.543164,0.791063,0.540727,0.840935,0.568638,0.000000,0.000000,non_cutting
2217,person_2217.jpg,0.921107,0.441252,0.925652,0.434964,0.917432,0.433961,0.932404,0.440919,0.911215,...,0.559519,0.943077,0.552918,0.888104,0.549330,0.000000,0.000000,0.000000,0.000000,non_cutting


In [None]:
df.label.value_counts()

non_cutting    1904
cutting         315
Name: label, dtype: int64

# **Level 3: Training a custom pose estimation model**

Expanding from level 2, train a custom pose estimation model. The objective is to create a model that can identify whether a person is performing the cutting action based on labelled data.

instructions:

•   Use the annotate data from level 2 to train a custom pose estimation model.

•   Evaluate and fine-tune the model to enhance its accuracy in detecting the cutting action.

## Solution 1

### XGBOOST

In [None]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming your training data is stored in a DataFrame called df
# Make sure to preprocess your data appropriately before training the model
df = pd.read_csv('/content/gdrive/MyDrive/KinetixPro/level 2 output/dataset/dataset.csv')

# Define features (X) and target variable (y)
X = df.drop(['label', 'image_name'], axis=1)  # Assuming 'label' is the column containing the target variable
y = df['label'].map({'cutting': 0, 'non_cutting': 1})  # Convert labels to 0 and 1

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an XGBoost classifier
model = xgb.XGBClassifier(objective='binary:logistic', eval_metric='logloss')

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Save the trained model
model.save_model("/content/gdrive/MyDrive/KinetixPro/level 3 output/model_weights.xgb")

Accuracy: 0.9864864864864865




### Neural Network

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

df = pd.read_csv('/content/gdrive/MyDrive/KinetixPro/level 2 output/dataset/dataset.csv')

# Define features (X) and target variable (y)
X = df.drop(['label', 'image_name'], axis=1)  # Assuming 'label' is the column containing the target variable
y = df['label'].map({'cutting': 0, 'non_cutting': 1})  # Convert labels to 0 and 1

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,stratify = y)


In [None]:
import tensorflow as tf
import tensorflow.keras.layers as tfl

model = tf.keras.Sequential([
    tfl.Dense(32, activation='relu' ,input_shape=(X_train.shape[1],)),
    tfl.Dense(16, activation='relu'),
    tfl.Dense(1, activation='sigmoid')
])

model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(),
    metrics = ['accuracy']
    )

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if (logs.get('accuracy') > 0.95) and (logs.get('val_accuracy') > 0.9) :
            self.model.stop_training = True

callbacks = myCallback()

latih = model.fit(X_train, y_train,
            epochs=500,
            validation_data=(X_test, y_test),
            callbacks = [callbacks]
            )

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500


In [None]:
save_model_path = '/content/gdrive/MyDrive/KinetixPro/level 3 output'

# save architecture of our model
model_json = model.to_json()
with open(f'{save_model_path}/model_architecture.json', "w") as json_file:
    json_file.write(model_json)

model.save_weights(f"{save_model_path}/model_weights.h5")

## Solution 2

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the path to your main folder containing subfolders with images
main_folder_path = '/content/gdrive/MyDrive/KinetixPro/level 2 output/dataset'

# Set up data generators for training and testing
datagen = ImageDataGenerator(validation_split=0.2)

# Set up training data generator
train_generator = datagen.flow_from_directory(
    main_folder_path,
    target_size=(256, 256),  # e.g., (224, 224) depending on your model input size
    batch_size=32,
    class_mode='binary',
    subset='training'
)

# Set up validation data generator
validation_generator = datagen.flow_from_directory(
    main_folder_path,
    target_size=(256, 256),
    batch_size=16,
    class_mode='binary',
    subset='validation'  # specify that this is the validation set
)

# Now, you can use train_generator and validation_generator in your model.fit() function
# For example:
# model.fit(train_generator, epochs=your_epochs, validation_data=validation_generator)

Found 1776 images belonging to 2 classes.
Found 443 images belonging to 2 classes.


In [None]:
import tensorflow as tf
import tensorflow.keras.layers as tfl

model_cnn = tf.keras.models.Sequential([
    # This is the first convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(256, 256, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_cnn.compile(loss = 'binary_crossentropy', optimizer='Adam', metrics=['accuracy'])

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if (logs.get('accuracy') > 0.95) and (logs.get('val_accuracy') > 0.9) :
            self.model.stop_training = True

callbacks = myCallback()

history = model_cnn.fit(train_generator, epochs=1000, steps_per_epoch=8, validation_data = validation_generator, verbose = 1, validation_steps=8, callbacks = [callbacks])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000


In [None]:
save_model_path = '/content/gdrive/MyDrive/KinetixPro/level 3 output'

# save architecture of our model
model_json = model_cnn.to_json()
with open(f'{save_model_path}/model_architecture_cnn.json', "w") as json_file:
    json_file.write(model_json)

model_cnn.save_weights(f"{save_model_path}/model_weights_cnn.h5")

# **Level 4: Model Deployment and Video Annotations**

•	To deploy trained model from Level 3 and provide video annotations of when a person is performing a cutting action.


## With XGBoost

In [None]:
from ultralytics import YOLO
import cv2
from google.colab.patches import cv2_imshow
import xgboost as xgb
import pandas as pd

model_yolo = YOLO('/content/gdrive/MyDrive/KinetixPro/yolov8s-pose.pt')
model = xgb.Booster()
model.load_model('/content/gdrive/MyDrive/KinetixPro/level 3 output/model_weights.xgb')


video_path = "/content/gdrive/MyDrive/KinetixPro/video/Samurai qualifications.mp4"
cap = cv2.VideoCapture(video_path)

print('Total Frame', cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc('F','M','P','4')
output_path = "/content/gdrive/MyDrive/KinetixPro/level 4 output/output_level4_xgb.avi"
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

frame_tot = 0
# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 inference on the frame
        results = model_yolo(frame, verbose = False)

        # Visualize the results on the frame
        annotated_frame = results[0].plot(boxes = False)

        for r in results:
          bound_box = r.boxes.xyxy
          conf = r.boxes.conf.tolist()
          keypoints = r.keypoints.xyn.tolist()

          for index, box in enumerate(bound_box):
            if conf[index] > 0.75:
                x1, y1, x2, y2 = box.tolist()
                data = {}

                # Initialize the x and y lists for each possible key
                for j in range(len(keypoints[index])):
                    data[f'x{j}'] = keypoints[index][j][0]
                    data[f'y{j}'] = keypoints[index][j][1]

                df = pd.DataFrame(data, index=[0])
                dmatrix = xgb.DMatrix(df)
                cut = model.predict(dmatrix)
                binary_predictions = (cut > 0.5).astype(int)
                if binary_predictions == 0:
                  cv2.rectangle(annotated_frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 2)
                  cv2.putText(annotated_frame, 'Cutting', (int(x1), int(y1)), cv2.FONT_HERSHEY_DUPLEX, 1.0, (255,0,0), 3)

        # Display the annotated frame
        # cv2_imshow(annotated_frame)

        # save the video
        out.write(annotated_frame)
        frame_tot += 1
        # print('Processed Frame : ', frame_tot)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

Total Frame 2555.0


Running time for xgboost about 4 minutes

## With Neural Netwok

In [None]:
from ultralytics import YOLO
import cv2
from google.colab.patches import cv2_imshow
import tensorflow as tf
from tensorflow.keras.models import model_from_json
import pandas as pd

# Load the model architecture from the JSON file
json_file_path = "/content/gdrive/MyDrive/KinetixPro/level 3 output/model_architecture.json"
with open(json_file_path, "r") as json_file:
    loaded_model_json = json_file.read()

model_tf = model_from_json(loaded_model_json)

# Load the model weights from the HDF5 file
weights_file_path = "/content/gdrive/MyDrive/KinetixPro/level 3 output/model_weights.h5"
model_tf.load_weights(weights_file_path)

model = YOLO('/content/gdrive/MyDrive/KinetixPro/yolov8s-pose.pt')

video_path = "/content/gdrive/MyDrive/KinetixPro/video/Samurai qualifications.mp4"
cap = cv2.VideoCapture(video_path)

print('Total Frame', cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc('F','M','P','4')
output_path = "/content/gdrive/MyDrive/KinetixPro/level 4 output/output_level4_NN.avi"
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

frame_tot = 0
# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 inference on the frame
        results = model(frame, verbose = False)

        # Visualize the results on the frame
        annotated_frame = results[0].plot(boxes = False)

        for r in results:
          bound_box = r.boxes.xyxy
          conf = r.boxes.conf.tolist()
          keypoints = r.keypoints.xyn.tolist()

          for index, box in enumerate(bound_box):
            if conf[index] > 0.75:
                x1, y1, x2, y2 = box.tolist()
                data = {}

                # Initialize the x and y lists for each possible key
                for j in range(len(keypoints[index])):
                    data[f'x{j}'] = keypoints[index][j][0]
                    data[f'y{j}'] = keypoints[index][j][1]

                df = pd.DataFrame(data, index=[0])
                cut = model_tf.predict(df, verbose = False)
                # print(df, cut)
                # cv2_imshow(frame[int(y1):int(y2), int(x1):int(x2)])
                if cut[0][0] < 0.5:
                  cv2.rectangle(annotated_frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 2)
                  cv2.putText(annotated_frame, 'Cutting', (int(x1), int(y1)), cv2.FONT_HERSHEY_DUPLEX, 1.0, (255,0,0), 3)

        # Display the annotated frame
        # cv2_imshow(annotated_frame)

        # save the video
        out.write(annotated_frame)
        frame_tot += 1
        print('Processed Frame : ', frame_tot)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

Total Frame 2555.0


  updates=self.state_updates,


Processed Frame :  1
Processed Frame :  2
Processed Frame :  3
Processed Frame :  4
Processed Frame :  5
Processed Frame :  6
Processed Frame :  7
Processed Frame :  8
Processed Frame :  9
Processed Frame :  10
Processed Frame :  11
Processed Frame :  12
Processed Frame :  13
Processed Frame :  14
Processed Frame :  15
Processed Frame :  16
Processed Frame :  17
Processed Frame :  18
Processed Frame :  19
Processed Frame :  20
Processed Frame :  21
Processed Frame :  22
Processed Frame :  23
Processed Frame :  24
Processed Frame :  25
Processed Frame :  26
Processed Frame :  27
Processed Frame :  28
Processed Frame :  29
Processed Frame :  30
Processed Frame :  31
Processed Frame :  32
Processed Frame :  33
Processed Frame :  34
Processed Frame :  35
Processed Frame :  36
Processed Frame :  37
Processed Frame :  38
Processed Frame :  39
Processed Frame :  40
Processed Frame :  41
Processed Frame :  42
Processed Frame :  43
Processed Frame :  44
Processed Frame :  45
Processed Frame :  

## With Convolutional Neural Network

In [None]:
from ultralytics import YOLO
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
import tensorflow as tf
from tensorflow.keras.models import model_from_json
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load the model architecture from the JSON file
json_file_path = "/content/gdrive/MyDrive/KinetixPro/level 3 output/model_architecture_cnn.json"
with open(json_file_path, "r") as json_file:
    loaded_model_json = json_file.read()

model_tf = model_from_json(loaded_model_json)

# Load the model weights from the HDF5 file
weights_file_path = "/content/gdrive/MyDrive/KinetixPro/level 3 output/model_weights_cnn.h5"
model_tf.load_weights(weights_file_path)

model = YOLO('/content/gdrive/MyDrive/KinetixPro/yolov8s-pose.pt')

video_path = "/content/gdrive/MyDrive/KinetixPro/video/Samurai qualifications.mp4"
cap = cv2.VideoCapture(video_path)

print('Total Frame', cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc('F','M','P','4')
output_path = "/content/gdrive/MyDrive/KinetixPro/level 4 output/output_level4_CNN.avi"
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

frame_tot = 0
# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 inference on the frame
        results = model(frame, verbose = False)

        # Visualize the results on the frame
        annotated_frame = results[0].plot(boxes = False)

        for r in results:
          bound_box = r.boxes.xyxy
          conf = r.boxes.conf.tolist()
          keypoints = r.keypoints.xyn.tolist()

          for index, box in enumerate(bound_box):
            if conf[index] > 0.75:
                x1, y1, x2, y2 = box.tolist()
                data = frame[int(y1):int(y2), int(x1):int(x2)]
                data = cv2.resize(data, (256, 256))

                # Correct variable name from img_array to image_array
                data = data[:, :, ::-1]
                image_array = img_to_array(data)
                image_array = np.expand_dims(image_array, axis=0)

                cut = model_tf.predict(image_array, verbose=False)
                # print(cut)
                if cut[0][0] < 0.5:
                    cv2.rectangle(annotated_frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 2)
                    cv2.putText(annotated_frame, 'Cutting', (int(x1), int(y1)), cv2.FONT_HERSHEY_DUPLEX, 1.0, (255, 0, 0), 3)

        # Display the annotated frame
        # cv2_imshow(annotated_frame)

        # save the video
        out.write(annotated_frame)
        frame_tot += 1
        print('Processed Frame : ', frame_tot)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

Total Frame 2555.0


  updates=self.state_updates,


Processed Frame :  1
Processed Frame :  2
Processed Frame :  3
Processed Frame :  4
Processed Frame :  5
Processed Frame :  6
Processed Frame :  7
Processed Frame :  8
Processed Frame :  9
Processed Frame :  10
Processed Frame :  11
Processed Frame :  12
Processed Frame :  13
Processed Frame :  14
Processed Frame :  15
Processed Frame :  16
Processed Frame :  17
Processed Frame :  18
Processed Frame :  19
Processed Frame :  20
Processed Frame :  21
Processed Frame :  22
Processed Frame :  23
Processed Frame :  24
Processed Frame :  25
Processed Frame :  26
Processed Frame :  27
Processed Frame :  28
Processed Frame :  29
Processed Frame :  30
Processed Frame :  31
Processed Frame :  32
Processed Frame :  33
Processed Frame :  34
Processed Frame :  35
Processed Frame :  36
Processed Frame :  37
Processed Frame :  38
Processed Frame :  39
Processed Frame :  40
Processed Frame :  41
Processed Frame :  42
Processed Frame :  43
Processed Frame :  44
Processed Frame :  45
Processed Frame :  

# Reference

1. https://alimustoofaa.medium.com/yolov8-pose-estimation-and-pose-keypoint-classification-using-neural-net-pytorch-98469b924525

2. https://github.com/mmakos/HPC

3. https://s3.eu-central-1.amazonaws.com/ucu.edu.ua/wp-content/uploads/sites/8/2020/11/Zakharchenko-Iryna_188601_assignsubmission_file_Iryna-Zakharchenko.pdf