# Video Classification with Pytorch

In this tutorial, I will guide video classification on [Coin Dataset](https://github.com/coin-dataset/annotations). Coin is a dataset that contains instructional videos with their corresponding action labels. There are various actions, but I choose 2 of them () for simplicity of tutorial.

Recipe Steps:
1. Download Data
2. 

In [196]:
import os
import json
import datetime
import pandas as pd

import yt_dlp
from yt_dlp import YoutubeDL

Target Id                                   0
Target Label             UseAnalyticalBalance
Action Id                                  65
Action Label    clean inner wall of container
Name: 0, dtype: object

In [197]:
class Taxonomy():
    def __init__(self, taxonomy_path) -> None:
        self.taxonomy_path = taxonomy_path
        df_taxonomy = pd.read_csv(taxonomy_path)

        self.target_id2label = {row["Target Id"]: row["Target Label"] for _, row in df_taxonomy.iterrows()}
        self.target_label2id = {j:i for i, j in self.target_id2label.items()}
        
        self.action_id2label = {row["Action Id"]: row["Action Label"] for _, row in df_taxonomy.iterrows()}


class Dataset():
    def __init__(self, coin_json_path, taxonomy_path) -> None:
        self.coin_json_path = coin_json_path
        self.taxonomy_path = taxonomy_path

        self.taxonomy = Taxonomy(taxonomy_path=taxonomy_path)
        self.raw_data = self.load_coin_json_from_file(coin_json_path)

    def load_coin_json_from_file(self, file_path):
        f = open(file_path)
        data = json.load(f)["database"]
        return data

    def create_dataset(self, target_label_list):
        target_ids = {i: [self.taxonomy.target_label2id[j] for j in target_label_list[i]] for i in range(len(target_label_list))}
        
        target_ids_reverse = {}
        for upper, target_id_list in target_ids.items():
            for each in target_id_list:
                target_ids_reverse[each] = upper
        
        dataset_list = []
        for sample_id, sample in self.raw_data.items():
            annotations = sample["annotation"]
            recipe_id = sample["recipe_type"]
            video_url = sample["video_url"]

            if recipe_id in target_ids_reverse.keys():
                for ann in annotations:
                    segment = f"{ann['segment'][0]}_{ann['segment'][1]}"
                    label = ann["label"]

                    dataset_list.append([target_ids_reverse[recipe_id], recipe_id, video_url, segment, label])

        pd_limited_data = pd.DataFrame(dataset_list, columns=["label", "action id", "url", "segment", "action label"])
        self.classes = pd_limited_data["label"].unique()
        
        return pd_limited_data

    def download_dataset(self, df_dataset: pd.DataFrame, save_folder, drop_none):
        paths = []

        os.makedirs(save_folder, exist_ok=True)
        for i in self.classes:
            os.makedirs(os.path.join(save_folder, str(i)), exist_ok=True)

        for index, sample in df_dataset.iterrows():
            label = sample.label
            url = sample.url
            video_url_id = url.split("/")[-1]

            URLS = [url]

            save_path = os.path.join(save_folder, str(label), video_url_id)
            ydl_opts = {
                'format': 'mp4',
                'outtmpl': save_path
            }

            try:
                with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                    error_code = ydl.download(URLS)
                paths.append(save_path)
            except:
                paths.append(None)

        df_dataset["paths"] = paths

        if drop_none:
            df_dataset = df_dataset.dropna()
            df_dataset = df_dataset.reset_index(drop=True)
        return df_dataset

In [198]:
dataset = Dataset(
    coin_json_path = "annotations/COIN.json",
    taxonomy_path = "annotations/target_action_mapping.csv"
)

target_label_list = [
    ["MakeSandwich", "CookOmelet", "MakePizza", "MakeYoutiao", "MakeBurger", "MakeFrenchFries"],
    ["AssembleBed", "AssembleSofa", "AssembleCabinet", "AssembleOfficeChair"],
]

df_dataset = dataset.create_dataset(target_label_list)[:10]
df_dataset.head()

Unnamed: 0,label,action id,url,segment,action label
0,1,3,https://www.youtube.com/embed/4kiZIDOUQAo,18.0_22.0,install legs of sofa
1,1,3,https://www.youtube.com/embed/4kiZIDOUQAo,28.0_44.0,put on sofa cover
2,1,3,https://www.youtube.com/embed/4kiZIDOUQAo,45.0_47.0,place cushion and backrest
3,0,79,https://www.youtube.com/embed/gnlUBK-cvfc,10.0_15.0,knead the meat
4,0,79,https://www.youtube.com/embed/gnlUBK-cvfc,25.5_29.0,fry meat


In [199]:
df_dataset = dataset.download_dataset(df_dataset=df_dataset, save_folder="coin_subset", drop_none=True)

[youtube] 4kiZIDOUQAo: Downloading webpage
[youtube] 4kiZIDOUQAo: Downloading android player API JSON
[info] 4kiZIDOUQAo: Downloading 1 format(s): 22
[download] coin_subset/1/4kiZIDOUQAo has already been downloaded
[download] 100% of 6.42MiB
[youtube] 4kiZIDOUQAo: Downloading webpage
[youtube] 4kiZIDOUQAo: Downloading android player API JSON
[info] 4kiZIDOUQAo: Downloading 1 format(s): 22
[download] coin_subset/1/4kiZIDOUQAo has already been downloaded
[download] 100% of 6.42MiB
[youtube] 4kiZIDOUQAo: Downloading webpage
[youtube] 4kiZIDOUQAo: Downloading android player API JSON
[info] 4kiZIDOUQAo: Downloading 1 format(s): 22
[download] coin_subset/1/4kiZIDOUQAo has already been downloaded
[download] 100% of 6.42MiB
[youtube] gnlUBK-cvfc: Downloading webpage
[youtube] gnlUBK-cvfc: Downloading android player API JSON
[info] gnlUBK-cvfc: Downloading 1 format(s): 22
[download] coin_subset/0/gnlUBK-cvfc has already been downloaded
[download] 100% of 12.73MiB
[youtube] gnlUBK-cvfc: Download

ERROR: [youtube] WE9j6r3s7OY: Private video. Sign in if you've been granted access to this video


[youtube] WE9j6r3s7OY: Downloading webpage
[youtube] WE9j6r3s7OY: Downloading android player API JSON


ERROR: [youtube] WE9j6r3s7OY: Private video. Sign in if you've been granted access to this video


[youtube] PDele0Lq-iM: Downloading webpage
[youtube] PDele0Lq-iM: Downloading android player API JSON
[info] PDele0Lq-iM: Downloading 1 format(s): 18
[download] coin_subset/1/PDele0Lq-iM has already been downloaded
[download] 100% of 6.35MiB
[youtube] PDele0Lq-iM: Downloading webpage
[youtube] PDele0Lq-iM: Downloading android player API JSON
[info] PDele0Lq-iM: Downloading 1 format(s): 18
[download] coin_subset/1/PDele0Lq-iM has already been downloaded
[download] 100% of 6.35MiB


In [202]:
df_dataset

Unnamed: 0,label,action id,url,segment,action label,paths
0,1,3,https://www.youtube.com/embed/4kiZIDOUQAo,18.0_22.0,install legs of sofa,coin_subset/1/4kiZIDOUQAo
1,1,3,https://www.youtube.com/embed/4kiZIDOUQAo,28.0_44.0,put on sofa cover,coin_subset/1/4kiZIDOUQAo
2,1,3,https://www.youtube.com/embed/4kiZIDOUQAo,45.0_47.0,place cushion and backrest,coin_subset/1/4kiZIDOUQAo
3,0,79,https://www.youtube.com/embed/gnlUBK-cvfc,10.0_15.0,knead the meat,coin_subset/0/gnlUBK-cvfc
4,0,79,https://www.youtube.com/embed/gnlUBK-cvfc,25.5_29.0,fry meat,coin_subset/0/gnlUBK-cvfc
5,0,79,https://www.youtube.com/embed/gnlUBK-cvfc,45.0_55.0,combine meat and bread to make burger,coin_subset/0/gnlUBK-cvfc
6,1,157,https://www.youtube.com/embed/PDele0Lq-iM,23.0_74.0,install the wheels for the base,coin_subset/1/PDele0Lq-iM
7,1,157,https://www.youtube.com/embed/PDele0Lq-iM,77.0_86.0,assemble the cushion and the backrest,coin_subset/1/PDele0Lq-iM


In [201]:
import torch
import torch.nn as nn


class ClassificationDataset(torch.utils.data.Dataset):
    def __init__(self, df_dataset) -> None:
        super().__init__()
        
        self.df_dataset = df_dataset
    
    def __len__(self):
        return len(df_dataset)

    def __getitem__(self, idx):
        video_path = df_dataset


classification_dataset = ClassificationDataset(dataset=df_dataset)