In [1]:
import sys
import pandas as pd
import numpy as np
from pathlib import Path
import signal
import cv2
from PIL import Image
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import torch.optim as optim
import torchvision.transforms as transforms

In [15]:
frames = []

def extract_frames(video_path, interval_seconds):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video file:", video_path)
        return []

    fps = cap.get(cv2.CAP_PROP_FPS)
    interval_frames = int(fps * interval_seconds)
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        if frame_count % interval_frames == 0:
            # making frames gray and images sharper
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(gray, (5,5), 0)
            edges = cv2.Canny(blurred, 30, 80)
            frames.append(edges)

    cap.release()

video_path = '/content/drive/MyDrive/Internshala/Videos.mp4'
interval_seconds = 1  # we are taking 1 frame at every second

extract_frames(video_path, interval_seconds)
enhanced_frames = np.array(frames)
total_number_of_frames = len(enhanced_frames)

# Convert your data to PyTorch tensors
frames_predictions = torch.tensor(enhanced_frames)


In [16]:
# Define the custom model class
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.output_branch1 = nn.Sequential(
            nn.Linear(256 * 11 * 20, 512),
            nn.ReLU(),
            nn.Linear(512, 3)
        )

        self.output_branch2 = nn.Sequential(
            nn.Linear(256 * 11 * 20, 512),
            nn.ReLU(),
            nn.Linear(512, 3)
        )

        self.output_branch3 = nn.Sequential(
            nn.Linear(256 * 11 * 20, 512),
            nn.ReLU(),
            nn.Linear(512, 3)
        )

        self.output_branch4 = nn.Sequential(
            nn.Linear(256 * 11 * 20, 512),
            nn.ReLU(),
            nn.Linear(512, 3)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        out1 = self.output_branch1(x)
        out2 = self.output_branch2(x)
        out3 = self.output_branch3(x)
        out4 = self.output_branch4(x)
        return out1, out2, out3, out4

# Assuming you have input images (frames_predictions)
images_tensor = torch.tensor(frames_predictions).float().unsqueeze(1)  # Convert grayscale images to PyTorch tensor and make it float

# Define the dataset and data loader for prediction
val_dataset = TensorDataset(images_tensor)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Load the trained model
model = CustomModel()

# give location of weights and bias
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/contweights.pth'))
model.eval()  # Set the model to evaluation mode

# Make predictions on the test dataset
predictions = []
with torch.no_grad():
    for batch_data in val_loader:
        # Get inputs from the current batch
        inputs = batch_data[0]

        # Forward pass
        outputs1, outputs2, outputs3, outputs4 = model(inputs)
        preds1 = torch.argmax(outputs1, dim=1)
        preds2 = torch.argmax(outputs2, dim=1)
        preds3 = torch.argmax(outputs3, dim=1)
        preds4 = torch.argmax(outputs4, dim=1)
        batch_preds = torch.stack((preds1, preds2, preds3, preds4), dim=1)
        predictions.append(batch_preds)

# Convert predictions to a numpy array
predictions = torch.cat(predictions).numpy()

# adding all frames
video_result = np.sum(predictions, axis=0)/total_number_of_frames
print(video_result)
# # Display predictions
# print(predictions)

  images_tensor = torch.tensor(frames_predictions).float().unsqueeze(1)  # Convert grayscale images to PyTorch tensor and make it float


[1.48192771 1.75903614 1.62650602 1.74698795]


In [18]:
# labeling them
if video_result[0]<0.5:
  child_gaze = 'high'
elif video_result[0]<1.5:
  child_gaze = 'moderate'
else:
  child_gaze = 'low'

if video_result[1]<0.5:
  theripist_gaze = 'high'
elif video_result[1]<1.5:
  theripist_gaze = 'moderate'
else:
  theripist_gaze = 'low'

if video_result[2]<0.5:
  object_interaction = 'ball'
elif video_result[2]<1.5:
  object_interaction = 'puzzle'
else:
  object_interaction = 'nor ball neighther puzzle something else'

if video_result[3]<0.5:
  engagement_level = 'high'
elif video_result[3]<1.5:
  engagement_level = 'moderate'
else:
  engagement_level = 'low'

print(f'child gaze is with therpist is {child_gaze} therpist gaze is with child is {theripist_gaze} \n the object with they interacting is {object_interaction} then engaement level betwwen them is {engagement_level}')



child gaze is with therpist is moderate therpist gaze is with child is low 
 the object with they interacting is nor ball neighther puzzle something else then engaement level betwwen them is low
