<a href="https://colab.research.google.com/github/SahilCarterr/Dash-Cam-Video-Analyzer/blob/New-Code-Updates/animal_model_with_interface.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!wget https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json
!pip install pytorchvideo
from typing import Dict
import torch
import json
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample,
    UniformCropVideo
)

class VideoClassifier:
    def __init__(self, device="cuda"):
        self.device = device
        self.model_name = "slowfast_r50"
        self.model = torch.hub.load("facebookresearch/pytorchvideo", model=self.model_name, pretrained=True)
        self.model = self.model.to(self.device)
        self.model = self.model.eval()

        with open("kinetics_classnames.json", "r") as f:
            self.kinetics_classnames = json.load(f)

        self.kinetics_id_to_classname = {}
        for k, v in self.kinetics_classnames.items():
            self.kinetics_id_to_classname[v] = str(k).replace('"', "")

        self.transform = self._create_transform()

    def _create_transform(self):
        side_size = 256
        mean = [0.45, 0.45, 0.45]
        std = [0.225, 0.225, 0.225]
        crop_size = 256
        num_frames = 32
        sampling_rate = 2
        frames_per_second = 30
        alpha = 4

        class PackPathway(torch.nn.Module):
            def __init__(self):
                super().__init__()

            def forward(self, frames: torch.Tensor):
                fast_pathway = frames
                slow_pathway = torch.index_select(
                    frames,
                    1,
                    torch.linspace(
                        0, frames.shape[1] - 1, frames.shape[1] // alpha
                    ).long(),
                )
                frame_list = [slow_pathway, fast_pathway]
                return frame_list

        transform = ApplyTransformToKey(
            key="video",
            transform=Compose(
                [
                    UniformTemporalSubsample(num_frames),
                    Lambda(lambda x: x/255.0),
                    NormalizeVideo(mean, std),
                    ShortSideScale(size=side_size),
                    CenterCropVideo(crop_size),
                    PackPathway()
                ]
            ),
        )
        return transform

    def classify_video(self, video_path: str) -> Dict[str, str]:
        clip_duration = (32 * 2) / 30  # Assuming fixed values for duration

        start_sec = 0
        end_sec = start_sec + clip_duration

        video = EncodedVideo.from_path(video_path)
        video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)
        video_data = self.transform(video_data)

        inputs = video_data["video"]
        inputs = [i.to(self.device)[None, ...] for i in inputs]

        preds = self.model(inputs)
        post_act = torch.nn.Softmax(dim=1)
        preds = post_act(preds)
        pred_classes = preds.topk(k=100).indices

        pred_class_names = [self.kinetics_id_to_classname[int(i)] for i in pred_classes[0]]

        single_words_list = [word for sublist in [phrase.split() for phrase in pred_class_names] for word in sublist]

        animals = [
            "antelope", "badger", "bear", "bison", "boar", "cat", "chimpanzee", "cow",
            "coyote", "deer", "dog", "donkey", "elephant", "fox", "goat", "goose",
            "gorilla", "hare", "hedgehog", "hippopotamus", "horse", "hyena", "kangaroo",
            "koala", "leopard", "lion", "okapi", "orangutan", "otter", "ox", "panda",
            "pig", "porcupine", "possum", "raccoon", "reindeer", "rhinoceros", "sheep",
            "snake", "tiger", "wolf", "zebra"
        ]

        found = any(word in animals for word in single_words_list)

        return { "Animal Found" if found else "Not found"}

# Example usage:
classifier = VideoClassifier()
#result = classifier.classify_video("car_racing1.mp4")
# print(result)

--2024-02-25 17:09:10--  https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 13.226.210.25, 13.226.210.78, 13.226.210.15, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|13.226.210.25|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10326 (10K) [text/plain]
Saving to: ‘kinetics_classnames.json.1’


2024-02-25 17:09:10 (236 MB/s) - ‘kinetics_classnames.json.1’ saved [10326/10326]



Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main


In [4]:
!pip install gradio
import gradio as gr


# Instantiate your VideoClassifier
classifier = VideoClassifier()

# Define a function to classify the video
def classify_video(video):
    # Since video is a file object, you need to pass the video directly to the classifier
    result = classifier.classify_video(video)
    return result

# Create Gradio interface
iface = gr.Interface(
    fn=classify_video,
    inputs="video",
    outputs="text",
    title="Animal Detection Model",
    description="Upload a video here.",
    allow_flagging=False
)

# Launch the interface
iface.launch()



Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://444a865b35403d5ed2.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


