# Dependencies

In [1]:
import pandas as pd
import numpy as np
import os
import random
import shutil
import cv2
from ultralytics import YOLO

# Create dataset

In [2]:
# Load in the tag file
train_tag = pd.read_csv('../Q3_data/train_tag.txt', sep=",", header=None)
train_tag.columns = ["video", "tag"]
train_tag

Unnamed: 0,video,tag
0,873879927.mp4,3
1,872438072.mp4,3
2,796902701_219_229.mp4,7
3,829923135_39_49.mp4,7
4,859022208.mp4,3
...,...,...
2058,304035088.mp4,0
2059,303734394.mp4,0
2060,892673978.mp4,3
2061,613605247.mp4,9


In [3]:
# Create the Dataset
train_folder = '../Q3_data/train_img/train'
test_folder = '../Q3_data/train_img/test'
os.makedirs(train_folder)
os.makedirs(test_folder)
for i in range(15):
    folder_name = f"L{i}"
    os.mkdir(train_folder+'/'+folder_name)
    os.mkdir(test_folder+'/'+folder_name)

# Frame extraction

In [4]:
folder_path = '../Q3_data/train_video'
# Extract frames for each video
for index, row in train_tag.iterrows():
    video_path = os.path.join(folder_path, row.video)
    row_video = row.video.replace(".mp4", "")
    path_out = os.path.join('../Q3_data/train_img', f'train/L{row.tag}',f'{row_video}_')

    cap = cv2.VideoCapture(video_path)
    
    # Obtain the total frame rate and frame rate of the video
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    n = total_frames//20
    
    for i in range(n-1):
        # Set the current frame as the first frame in the middle of the video
        cap.set(cv2.CAP_PROP_POS_FRAMES, 10*(i+1))
        # Read the frame of images
        ret, frame = cap.read()
        # Save the images
        try:
            cv2.imwrite(path_out+f'frame{i}.jpg', frame)
        except:
            print(path_out+f'frame{i}.jpg')

    # Close video file
    cap.release()

### Create validation set

In [5]:
for i in range(15):
    # Get all file names under the train folder
    files = os.listdir(train_folder+f'/L{i}')
    
    # Calculate the number of files that need to be moved
    num_files = len(files)
    num_test_files = int(num_files * 0.1)
    
    # Randomly select files to move
    test_files = random.sample(files, num_test_files)
    
    # Move files
    for file in test_files:
        src = os.path.join(train_folder+f'/L{i}', file)
        dst = os.path.join(test_folder+f'/L{i}', file)
        shutil.move(src, dst)

# Image classification

In [6]:
# Load a model
model = YOLO('yolov8m-cls.pt')  # build a new model from YAML
# Train the model
results = model.train(data='../Q3_data/train_img', epochs=20)

Ultralytics YOLOv8.0.227 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4070 Ti, 12282MiB)
[34m[1mengine\trainer: [0mtask=classify, mode=train, model=yolov8m-cls.pt, data=../Q3_data/train_img, epochs=20, patience=50, batch=16, imgsz=224, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True

[34m[1mtrain: [0mScanning C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_data\train_img\train... 30955 images, 0 corrupt: 100%|██████████| 30955/30955 [00:20<00:00, 1506.97it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_data\train_img\train.cache


[34m[1mval: [0mScanning C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_data\train_img\test... 3431 images, 0 corrupt: 100%|██████████| 3431/3431 [00:02<00:00, 1233.29it/s]


[34m[1mval: [0mNew cache created: C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_data\train_img\test.cache
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 38 weight(decay=0.0), 39 weight(decay=0.0005), 39 bias(decay=0.0)
Image sizes 224 train, 224 val
Using 8 dataloader workers
Logging results to [1mC:\Users\Lee\runs\classify\train8[0m
Starting training for 20 epochs...

      Epoch    GPU_mem       loss  Instances       Size


       1/20       1.2G     0.6193         11        224: 100%|██████████| 1935/1935 [01:22<00:00, 23.54it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:02<00:00, 37.15it/s]


                   all      0.955      0.994

      Epoch    GPU_mem       loss  Instances       Size


       2/20      1.16G     0.1996         11        224: 100%|██████████| 1935/1935 [00:51<00:00, 37.92it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 63.16it/s]

                   all       0.95      0.997






      Epoch    GPU_mem       loss  Instances       Size


       3/20      1.09G     0.1777         11        224: 100%|██████████| 1935/1935 [01:00<00:00, 32.02it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 65.34it/s]

                   all      0.952      0.996






      Epoch    GPU_mem       loss  Instances       Size


       4/20      1.08G     0.1151         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.28it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 64.83it/s]

                   all      0.969      0.999






      Epoch    GPU_mem       loss  Instances       Size


       5/20      1.08G    0.07344         11        224: 100%|██████████| 1935/1935 [00:44<00:00, 43.72it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 67.04it/s]

                   all      0.973      0.999






      Epoch    GPU_mem       loss  Instances       Size


       6/20      1.19G    0.05747         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.27it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 67.67it/s]

                   all      0.975      0.999






      Epoch    GPU_mem       loss  Instances       Size


       7/20      1.18G    0.04276         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.15it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 66.01it/s]

                   all      0.983      0.999






      Epoch    GPU_mem       loss  Instances       Size


       8/20      1.08G    0.02997         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.67it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 67.08it/s]

                   all      0.985      0.999






      Epoch    GPU_mem       loss  Instances       Size


       9/20      1.11G    0.02921         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.16it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 66.10it/s]

                   all      0.984      0.999






      Epoch    GPU_mem       loss  Instances       Size


      10/20      1.08G    0.02724         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.11it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 64.48it/s]

                   all      0.987      0.999






      Epoch    GPU_mem       loss  Instances       Size


      11/20      1.18G    0.01802         11        224: 100%|██████████| 1935/1935 [00:46<00:00, 41.91it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 68.35it/s]

                   all      0.988      0.999






      Epoch    GPU_mem       loss  Instances       Size


      12/20      1.09G    0.01178         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.81it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 66.83it/s]

                   all      0.989      0.999






      Epoch    GPU_mem       loss  Instances       Size


      13/20      1.09G    0.01202         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.92it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 67.33it/s]

                   all       0.99      0.998






      Epoch    GPU_mem       loss  Instances       Size


      14/20      1.17G    0.01217         11        224: 100%|██████████| 1935/1935 [00:42<00:00, 45.37it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 68.14it/s]

                   all      0.991      0.999






      Epoch    GPU_mem       loss  Instances       Size


      15/20      1.18G   0.009854         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.83it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 67.37it/s]

                   all      0.989      0.999






      Epoch    GPU_mem       loss  Instances       Size


      16/20      1.09G   0.004356         11        224: 100%|██████████| 1935/1935 [00:42<00:00, 45.73it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 65.14it/s]

                   all      0.991      0.999






      Epoch    GPU_mem       loss  Instances       Size


      17/20      1.13G   0.003568         11        224: 100%|██████████| 1935/1935 [00:42<00:00, 45.45it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 68.44it/s]

                   all      0.992      0.999






      Epoch    GPU_mem       loss  Instances       Size


      18/20       1.2G   0.002797         11        224: 100%|██████████| 1935/1935 [00:44<00:00, 43.69it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 67.29it/s]

                   all      0.992      0.999






      Epoch    GPU_mem       loss  Instances       Size


      19/20      1.07G   0.002555         11        224: 100%|██████████| 1935/1935 [00:42<00:00, 45.06it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 69.86it/s]

                   all      0.992      0.999






      Epoch    GPU_mem       loss  Instances       Size


      20/20      1.09G   0.002239         11        224: 100%|██████████| 1935/1935 [00:43<00:00, 44.78it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 66.79it/s]

                   all      0.993      0.999






20 epochs completed in 0.278 hours.
Optimizer stripped from C:\Users\Lee\runs\classify\train8\weights\last.pt, 31.7MB
Optimizer stripped from C:\Users\Lee\runs\classify\train8\weights\best.pt, 31.7MB

Validating C:\Users\Lee\runs\classify\train8\weights\best.pt...
Ultralytics YOLOv8.0.227 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4070 Ti, 12282MiB)
YOLOv8m-cls summary (fused): 103 layers, 15781871 parameters, 0 gradients, 41.6 GFLOPs
[34m[1mtrain:[0m C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_data\train_img\train... found 30955 images in 15 classes ✅ 
[34m[1mval:[0m None...
[34m[1mtest:[0m C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_data\train_img\test... found 3431 images in 15 classes ✅ 


               classes   top1_acc   top5_acc: 100%|██████████| 108/108 [00:01<00:00, 64.29it/s]


                   all      0.993      0.999
Speed: 0.0ms preprocess, 0.2ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mC:\Users\Lee\runs\classify\train8[0m
Results saved to [1mC:\Users\Lee\runs\classify\train8[0m


# Prediction

In [11]:
# Create a dictionary to map the label from yolo model
names = {0: 0, 1: 1, 2: 10, 3: 11, 4: 12, 5: 13, 6: 14, 7: 2, 8: 3, 9: 4, 10: 5, 11: 6, 12: 7, 13: 8, 14: 9}
test_path = '../Q3_data/test_video'
test_files = os.listdir(test_path)
model = YOLO('./last.pt')
label = []
for file in test_files:
    count = np.zeros(15)
    cap = cv2.VideoCapture(test_path+'/'+file)
    
    # Obtain the total frame rate and frame rate of the video
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    n = total_frames//20

    for i in range(n-1):
        # Set the current frame as the first frame in the middle of the video
        cap.set(cv2.CAP_PROP_POS_FRAMES, 10*(i+1))
        ret, frame = cap.read()
        try:
            cv2.imwrite('../Q3_data/test_img/test.jpg', frame)
        except:
            print('../Q3_data/test_img/test.jpg')
        
        # Predict by model
        results = model('../Q3_data/test_img/test.jpg')
        probs = results[0].probs.top1
        # Count the label appears most 
        count[probs] += 1
    
    result = np.argmax(count)
    label.append(names[result])
    
    # Close video file
    cap.release()

image 1/1 C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_code\..\Q3_data\test_img\test.jpg: 224x224 L4 1.00, L3 0.00, L1 0.00, L8 0.00, L7 0.00, 9.0ms
Speed: 1.0ms preprocess, 9.0ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)
image 1/1 C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_code\..\Q3_data\test_img\test.jpg: 224x224 L4 1.00, L3 0.00, L8 0.00, L1 0.00, L14 0.00, 8.0ms
Speed: 2.0ms preprocess, 8.0ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

image 1/1 C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_code\..\Q3_data\test_img\test.jpg: 224x224 L4 1.00, L3 0.00, L8 0.00, L1 0.00, L14 0.00, 8.0ms
Speed: 1.0ms preprocess, 8.0ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

image 1/1 C:\Users\Lee\Desktop\DSAA5002_50015811_LIYANTING_final\Q3\Q3_code\..\Q3_data\test_img\test.jpg: 224x224 L4 1.00, L3 0.00, L8 0.00, L14 0.00, L1 0.00, 15.0ms
Speed: 1.0ms preprocess, 15.0ms inference, 0.0

# Output

In [14]:
df = pd.DataFrame({
    'file_name': test_files,
    'label': label
})
df.to_csv('../Q3_output.csv', index=False)
df

Unnamed: 0,file_name,label
0,30018754.mp4,4
1,303746162.mp4,0
2,303771055.mp4,0
3,303783414.mp4,0
4,303866446.mp4,0
...,...,...
557,898969484_32_42.mp4,2
558,898979241_23_33.mp4,13
559,899183470.mp4,3
560,899731707_220_230.mp4,7


In [2]:
df = pd.read_csv('../Q3_output.csv')

In [9]:
random_row = df.sample(n=10)

for index, row in random_row.iterrows():
    test_path = '../Q3_data/test_video/'+row['file_name']
    cap = cv2.VideoCapture(test_path)
    success, frame = cap.read()
    cv2.imshow("Screenshot", frame)
    cv2.waitKey(0)
    
    # 关闭窗口
    cv2.destroyAllWindows()
    print(row['label'])


11
9
4
7
4
2
10
3
13
14
