In [1]:
from __future__ import print_function

import glob
from itertools import chain
import os
import random
import cv2  # Assuming you have OpenCV installed
import torchvision
from torchvision.datasets import ImageFolder
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import cv2
from linformer import Linformer
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm.notebook import tqdm
import itertools

from vit_pytorch.efficient import ViT

In [2]:
# Required Parameters
dataset = "C:\\Study\\OJT\\Dataset\\UCF11_updated_mpg"          # Dataset path
dataset2 = "C:\\Study\\OJT\\Dataset_Extraction\\UCF11"                   # Dataset2 path
train_path = "C:\\Study\\OJT\\Dataset_Extraction\\UCF11Train"             # Training path
test_path = "C:\\Study\\OJT\\Dataset_Extraction\\UCF11Test"             # Testing path
no_of_frames = 1650                     # Total number of frames to be extracted
categories = os.listdir(dataset)        # Name of each class/category

In [8]:
# Creating dataset directory
try:
    os.mkdir(dataset2)
    print("Folder {} created...".format(dataset2))
except:
    print("A folder {} already exists...".format(dataset2))

A folder C:\Study\OJT\Dataset_Extraction\UCF11 already exists...


In [9]:
# Creating training_set directory
try:
    os.mkdir(train_path)
    print("Folder {} created...".format(train_path))
except:
    print("A folder {} already exists...".format(train_path))

A folder C:\Study\OJT\Dataset_Extraction\UCF11Train already exists...


In [10]:
# Creating testing_set directory
try:
    os.mkdir(test_path)
    print("Folder {} created...".format(test_path))
except:
    print("A folder {} already exists...".format(test_path))

A folder C:\Study\OJT\Dataset_Extraction\UCF11Test already exists...


In [11]:
# Creating same directories for dataset2/ that are already present in the dataset directory
for category in categories:
    try:
        os.mkdir(os.path.join(dataset2, category))
        print("Folder {} created...".format(category))
    except:
        print("A folder already exists, named {}...".format(category, dataset))

A folder already exists, named basketball...
A folder already exists, named biking...
A folder already exists, named diving...
A folder already exists, named golf_swing...
A folder already exists, named horse_riding...
A folder already exists, named soccer_juggling...
A folder already exists, named swing...
A folder already exists, named tennis_swing...
A folder already exists, named trampoline_jumping...
A folder already exists, named volleyball_spiking...
A folder already exists, named walking...


In [12]:
for category in categories:
    try:
        os.mkdir(os.path.join(train_path, category))
        print("Folder {} created...".format(category))
    except:
        print("A folder already exists, named {}...".format(category, train_path))


A folder already exists, named basketball...
A folder already exists, named biking...
A folder already exists, named diving...
A folder already exists, named golf_swing...
A folder already exists, named horse_riding...
A folder already exists, named soccer_juggling...
A folder already exists, named swing...
A folder already exists, named tennis_swing...
A folder already exists, named trampoline_jumping...
A folder already exists, named volleyball_spiking...
A folder already exists, named walking...


In [13]:
# Creating same directories for testing_set/ that are already present in the dataset directory
for category in categories:
    try:
        os.mkdir(os.path.join(test_path, category))
        print("Folder {} created...".format(category))
    except:
        print("A folder already exists, named {}...".format(category, test_path))


A folder already exists, named basketball...
A folder already exists, named biking...
A folder already exists, named diving...
A folder already exists, named golf_swing...
A folder already exists, named horse_riding...
A folder already exists, named soccer_juggling...
A folder already exists, named swing...
A folder already exists, named tennis_swing...
A folder already exists, named trampoline_jumping...
A folder already exists, named volleyball_spiking...
A folder already exists, named walking...


In [10]:
# Combining multiple videos into single video file
for category in tqdm(categories):
    videofiles = glob.glob(os.path.join(dataset, category, "**/*.mpg"), recursive=True)
    if videofiles:
        cap = cv2.VideoCapture(videofiles[0])
    else:
        print("No video files found in {}/{}".format(dataset, category))
    video_index = 0
    cap = cv2.VideoCapture(videofiles[0])    
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    # fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter("{}/{}/{}.mpg".format(dataset2, category, category), fourcc, 25, (320, 240))
    while(cap.isOpened()):
        ret, frame = cap.read()
        if frame is None:
            video_index += 1
            if video_index >= len(videofiles):
                break
            else:
                cap = cv2.VideoCapture(videofiles[ video_index ])
                ret, frame = cap.read()
                out.write(frame)
        else:
            out.write(frame)
    cap.release()
    out.release()
    

  0%|          | 0/11 [00:00<?, ?it/s]

In [14]:
# Saving total no. of frames of each classes/categories into an array
total_frames = []
for category in tqdm(categories):
    cap = cv2.VideoCapture(dataset2 + "/" + category + "/" + category + ".mpg")
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    total_frames.append(length)
    cap.release()
    
print(total_frames)

  0%|          | 0/11 [00:00<?, ?it/s]

[18247, 30554, 26591, 22528, 34639, 42460, 25130, 25016, 21291, 12076, 25593]


In [12]:
# Extracting one frame per five frame from the Videos
for category in tqdm(categories):
    count = 0    
    a = glob.glob(dataset2 + '/' + category + '/' + category +'.mpg')
    for i in range(len(a)):
        cap = cv2.VideoCapture(a[i])
        frameRate = cap.get(5)
        while(cap.isOpened()):
            frameId = cap.get(1)
            ret, frame = cap.read()
            if (ret != True):
                break
            if (frameId % math.floor(frameRate) == 0):
                cv2.imwrite(train_path + '/' + category + '/{}_{}.jpg'.format(category, count), frame)
                count += 1
        cap.release()

  0%|          | 0/11 [00:00<?, ?it/s]

In [13]:
# Moving 150 random images from training_set into testing_set
for category in tqdm(categories):
    sub_file = [file for file in glob.glob(train_path +'/'+ category +'/'+ "*.jpg")]
    test_files = random.sample(sub_file, 25)
    for test_file in test_files:
        img = cv2.imread(test_file)
        os.remove(test_file)
        test_filename = os.path.basename(test_file)
        cv2.imwrite(test_path +'/' + category + '/' + test_filename , img)
        

  0%|          | 0/11 [00:00<?, ?it/s]

In [3]:
efficient_transformer = Linformer(
    dim=128,
    seq_len=49+1,  # 7x7 patches + 1 cls-token
    depth=12,
    heads=8,
    k=64
)

In [4]:
model = ViT(
    dim=128,
    image_size=224,
    patch_size=32,
    num_classes=11,
    transformer=efficient_transformer,
    channels=3,
)

In [15]:
# Training settings
batch_size = 64
epochs = 20
lr = 0.001
gamma = 0.7
seed = 42

In [16]:
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

In [7]:
# Define the transformation for loading and preprocessing images
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),  # You can adjust the crop size as needed
    transforms.ToTensor(),
])

In [8]:
# Create a custom dataset for the entire training set
full_dataset = ImageFolder(train_path, transform=transform)

In [9]:
# Split the dataset into training and validation sets
train_size = int(0.8 * len(full_dataset))  # 80% for training
val_size = len(full_dataset) - train_size  # 20% for validation
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

In [10]:
# Create custom datasets for training and test
train_dataset = ImageFolder(train_path, transform=transform)
test_dataset = ImageFolder(test_path, transform=transform)

In [11]:
# Create data loaders for training and test sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [13]:
# Initialize variables for early stopping
best_val_accuracy = 0.0
patience = 10  # Number of epochs to wait for improvement
counter = 0

In [17]:
# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if batch_idx % 10 == 9:  # Print every 10 batches
            print(f"Epoch {epoch+1}, Batch {batch_idx+1}/{len(train_loader)}, Loss: {running_loss / 10:.4f}, Training Accuracy: {100 * correct / total:.2f}%")
            running_loss = 0.0

    scheduler.step()

    # Validation accuracy calculation
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for val_inputs, val_labels in val_loader:
            val_outputs = model(val_inputs)
            _, val_predicted = torch.max(val_outputs.data, 1)
            val_total += val_labels.size(0)
            val_correct += (val_predicted == val_labels).sum().item()

    val_accuracy = 100 * val_correct / val_total
    print(f"Epoch {epoch+1}, Training Accuracy: {100 * correct / total:.2f}%, Validation Accuracy: {val_accuracy:.2f}%")

# Evaluation on the test set
model.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for test_inputs, test_labels in test_loader:
        test_outputs = model(test_inputs)
        _, test_predicted = torch.max(test_outputs.data, 1)
        test_total += test_labels.size(0)
        test_correct += (test_predicted == test_labels).sum().item()

print(f"Test Accuracy: {100 * test_correct / test_total:.2f}%")
torch.save(model.state_dict(), 'ViT_11_pth')

Epoch 1, Batch 10/174, Loss: 2.2322, Training Accuracy: 25.78%
Epoch 1, Batch 20/174, Loss: 2.1763, Training Accuracy: 23.28%
Epoch 1, Batch 30/174, Loss: 1.9882, Training Accuracy: 25.26%
Epoch 1, Batch 40/174, Loss: 1.9207, Training Accuracy: 27.38%
Epoch 1, Batch 50/174, Loss: 1.7644, Training Accuracy: 29.19%
Epoch 1, Batch 60/174, Loss: 1.6437, Training Accuracy: 31.74%
Epoch 1, Batch 70/174, Loss: 1.3805, Training Accuracy: 34.84%
Epoch 1, Batch 80/174, Loss: 1.2276, Training Accuracy: 37.77%
Epoch 1, Batch 90/174, Loss: 1.1877, Training Accuracy: 40.30%
Epoch 1, Batch 100/174, Loss: 1.0411, Training Accuracy: 42.80%
Epoch 1, Batch 110/174, Loss: 1.0043, Training Accuracy: 45.00%
Epoch 1, Batch 120/174, Loss: 1.0570, Training Accuracy: 46.64%
Epoch 1, Batch 130/174, Loss: 0.9661, Training Accuracy: 48.33%
Epoch 1, Batch 140/174, Loss: 0.9383, Training Accuracy: 49.88%
Epoch 1, Batch 150/174, Loss: 0.9387, Training Accuracy: 51.17%
Epoch 1, Batch 160/174, Loss: 0.8445, Training Ac

In [14]:
# Initialize the models
model = ViT(
    dim=128,
    image_size=224,
    patch_size=32,
    num_classes=11,
    transformer=efficient_transformer,
    channels=3,
)
model.load_state_dict(torch.load("C:\\Users\\Admin\\AppData\\Local\\Programs\\Microsoft VS Code\\ViT_11_pth"))  # Load the trained model weights
model.eval()  # Set the model to evaluation mode


ViT(
  (to_patch_embedding): Sequential(
    (0): Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=32, p2=32)
    (1): LayerNorm((3072,), eps=1e-05, elementwise_affine=True)
    (2): Linear(in_features=3072, out_features=128, bias=True)
    (3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  )
  (transformer): Linformer(
    (net): SequentialSequence(
      (layers): ModuleList(
        (0-11): 12 x ModuleList(
          (0): PreNorm(
            (fn): LinformerSelfAttention(
              (to_q): Linear(in_features=128, out_features=128, bias=False)
              (to_k): Linear(in_features=128, out_features=128, bias=False)
              (to_v): Linear(in_features=128, out_features=128, bias=False)
              (dropout): Dropout(p=0.0, inplace=False)
              (to_out): Linear(in_features=128, out_features=128, bias=True)
            )
            (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          )
          (1): PreNorm(
            (fn): Fe