In [8]:
import cv2
import torch
from torchvision.transforms import transforms
from torchvision import models
from collections import Counter

In [9]:
model = models.resnet50(pretrained = True)
model.eval()



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [10]:
def load_model():
    with open("D:/New_Emotion/Emotion-detection/New folder/practice_cv/imagenet-simple-labels.json") as f:
        label = [line.strip() for line in f.readlines()]
    return label

labels = load_model()

In [16]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

In [29]:
def classify_frame(frame):
    frame = transform(frame)
    frame = frame.unsqueeze(0)
    with torch.no_grad():
        outputs = model(frame)
    _,predicted = outputs.max(1)
    label = labels[predicted.item()]
    
    return label

In [30]:
# Function to extract frames and classify video
def classify_video(video_path, frame_interval=30):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    classifications = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % frame_interval == 0:
            label = classify_frame(frame)
            classifications.append(label)

        frame_count += 1

    cap.release()
    return classifications


In [31]:
classifications = classify_video("D:/New_Emotion/Emotion-detection/New folder/practice_cv/VID20230727123220.mp4")
common_label = Counter(classifications).most_common(1)[0][0]
print(common_label)


"park bench",


In [32]:
video_path ="D:/New_Emotion/Emotion-detection/New folder/practice_cv/VID20230727123220.mp4"
category = classify_video(video_path)
print(f"Predicted category: {category}")

Predicted category: ['"park bench",', '"cowboy hat",', '"hammer",', '"aircraft carrier",', '"rifle",', '"square academic cap",', '"swimming cap",', '"park bench",']
