# IMPORT

In [16]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import json
import os

os.chdir(r'F:\\UNIVERSITY\\UNIVERSITY_DOCUMENTS\\CS231\\doan_v2')

import torch.functional as F
import torch

from skimage import io, transform
from tqdm import tqdm
from torch import nn
from torch import optim

from sklearn.preprocessing import MultiLabelBinarizer

from torch.utils.data import Dataset, DataLoader
from torch.nn import Linear, ReLU, Sigmoid, Softmax, Dropout, Sequential

from torchvision import transforms, utils
from torchvision.transforms import ToTensor
from torchvision.models import vgg16, resnet50, densenet169
from torchvision.models.vgg import VGG16_Weights
import torch.nn.functional as F

In [2]:
# Load json
def load_json(path):
    with open(path, 'r', encoding='utf-8') as file:
        film_dic = json.load(file)
        return film_dic

# DATASET CUSTOM

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
def collate_fn(batch):
    ids = [item['id'] for item in batch]
    images = [item['image'].type(torch.FloatTensor) for item in batch]
    names = [item['name'] for item in batch]
    genres = [item['genre'].type(torch.FloatTensor) for item in batch]

    # print(f'Before{len(images)}')
    # images = [image.repeat(3, 1, 1) for image in images if image.shape[0] != 3]
    for i, image in enumerate(images):
        missing_channels = 3 - image.shape[0]
        if missing_channels > 0:
            images[i] = F.pad(image, (0, 0, 0, 0, 0, missing_channels))
        elif missing_channels < 0:
            images[i] = image[:3, :, :]

    images = torch.stack(images)
    genres = torch.stack(genres)

    return ids, images, genres, names

class MovieGenreDataset(Dataset):
    def __init__(self, config, path='data/train.json') -> None:
        super().__init__()

        # Config Setup
        img_size = config['img_size']
        epochs = config['epochs']
        batches = config['batches']

        # Variables setup
        json_file = load_json(path)
        self.transform = transforms
        self.data = []
        self.classes_ = [
            'action', 'adventure', 'comedy', 'crime', 'documentary', 'drama', 'fantasy', 'horror', 'mystery', 'thriller', 'romance', 'scifi', 'others'
        ]
        label2id = {class_: i for i, class_ in enumerate(self.classes_)}

        # Onehot Encode
        mlb = MultiLabelBinarizer()
        mlb.fit([self.classes_])

        # Transform Image (np.array) to torch.Tensor
        trans = transforms.Compose([
            # transforms.resize(output_shape=img_size),
            transforms.ToTensor()
        ])

        # self.data elements
        for id, (key, val) in tqdm(list(enumerate(json_file.items()))):
        # for id, val in tqdm(enumerate(list(json_file.values()))[:100]):
            genres = mlb.transform([val['genre']]).flatten()
            name = val['name']
            img_path = val['img_path']
            try:
                # Preprocessing Image
                image = io.imread(img_path)
                if image.shape[0: 2] != img_size:
                    image = transform.resize(
                        image=image,
                        output_shape=img_size
                    )
                image = trans(image)
            except:
                continue

            self.data.append({
                'id': key,
                'image': image.to(device),
                'name': name,
                'genre': torch.tensor(genres).to(device),
            })


    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index]

In [8]:
train_dataset = MovieGenreDataset(config, 'data/train.json')
val_dataset = MovieGenreDataset(config, 'data/val.json')
test_dataset = MovieGenreDataset(config, 'data/test.json')

100%|██████████| 19525/19525 [29:37<00:00, 10.99it/s]  
100%|██████████| 2359/2359 [09:37<00:00,  4.09it/s]


In [9]:
# DataLoader
dataloader_train = DataLoader(
    train_dataset, 
    batch_size=32,
    # shuffle=True, 
    collate_fn=collate_fn,
    # num_workers=1,
)

dataloader_val = DataLoader(
    val_dataset, 
    batch_size=32,
    # shuffle=True, 
    collate_fn=collate_fn,
    # num_workers=1,
)

dataloader_test = DataLoader(
    test_dataset, 
    batch_size=32,
    # shuffle=False, 
    collate_fn=collate_fn,
)

# EXTRACT DATA

## RESNET ARCHITECTURE

In [10]:
# Identity class
class Identity(nn.Module):
    def __init__(self) -> None:
        super(Identity, self).__init__()

    def forward(self, X):
        return X

In [11]:
# VGG16 Model
Resnet50_extractor = resnet50(
    # weights=VGG16_Weights.IMAGENET1K_V1,
    pretrained=True,
)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\ASUS/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:05<00:00, 17.6MB/s]


In [12]:
# Only train classifier
for param in Resnet50_extractor.parameters():
    param.requires_grad = False

# Classifier Modify
Resnet50_extractor.fc = Identity()

# to Device
Resnet50_extractor.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## EXTRACT

In [13]:
npy_save_dir = 'features_Resnet50'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_train)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]
    ids = np.array(ids)[extract_ids_idx]

    if len(images) == 0:
        continue
    features = Resnet50_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

611it [1:29:54,  8.83s/it]


In [14]:
npy_save_dir = 'features_Resnet50'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_val)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]

    if len(images) == 0:
        continue
    features = Resnet50_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

74it [11:39,  9.45s/it]


In [15]:
npy_save_dir = 'features_Resnet50'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_test)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]

    if len(images) == 0:
        continue
    features = Resnet50_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

79it [11:47,  8.95s/it]


# EXTRACT DATA DENSENET

## DENSENET ARCHITECTURE

In [17]:
# Identity class
class Identity(nn.Module):
    def __init__(self) -> None:
        super(Identity, self).__init__()

    def forward(self, X):
        return X

In [18]:
# VGG16 Model
Densenet169_extractor = densenet169(
    # weights=VGG16_Weights.IMAGENET1K_V1,
    pretrained=True,
)

Downloading: "https://download.pytorch.org/models/densenet169-b2777c0a.pth" to C:\Users\ASUS/.cache\torch\hub\checkpoints\densenet169-b2777c0a.pth
100%|██████████| 54.7M/54.7M [00:17<00:00, 3.23MB/s]


In [19]:
# Only train classifier
for param in Densenet169_extractor.parameters():
    param.requires_grad = False

# Classifier Modify
Densenet169_extractor.classifier = Identity()

# to Device
Densenet169_extractor.to(device)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

## EXTRACT

In [22]:
npy_save_dir = 'features_Densenet169'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_train)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]
    ids = np.array(ids)[extract_ids_idx]

    if len(images) == 0:
        continue
    features = Densenet169_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

611it [1:41:26,  9.96s/it]


In [23]:
npy_save_dir = 'features_Densenet169'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_val)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]

    if len(images) == 0:
        continue
    features = Densenet169_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

74it [13:13, 10.73s/it]


In [24]:
npy_save_dir = 'features_Densenet169'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_test)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]

    if len(images) == 0:
        continue
    features = Densenet169_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

79it [15:07, 11.49s/it]


# **EXTRACT DATA VGG**

## VGG ARCHITECTURE

In [None]:
# Identity class
class Identity(nn.Module):
    def __init__(self) -> None:
        super(Identity, self).__init__()

    def forward(self, X):
        return X

In [None]:
# VGG16 Model
VGG16_extractor = vgg16(
    # weights=VGG16_Weights.IMAGENET1K_V1,
    pretrained=True,
)



In [None]:
# Only train classifier
for param in VGG16_extractor.parameters():
    param.requires_grad = False

# Classifier Modify
VGG16_extractor.classifier[6] = Identity()

# to Device
VGG16_extractor.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

## EXTRACT

In [None]:
npy_save_dir = 'features_VGG'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_train)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]
    ids = np.array(ids)[extract_ids_idx]

    if len(images) == 0:
        continue
    features = VGG16_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

611it [2:25:11, 14.26s/it]


In [None]:
npy_save_dir = 'features_VGG'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_val)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]

    if len(images) == 0:
        continue
    features = VGG16_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

74it [20:33, 16.67s/it]


In [None]:
npy_save_dir = 'features_VGG'
for i_batch, (ids, images, genres, names) in tqdm(enumerate(dataloader_test)):
    extract_ids_idx = [i for i, id in enumerate(ids) if f'{id}.npy' not in os.listdir(npy_save_dir)]
    images = images[extract_ids_idx]

    if len(images) == 0:
        continue
    features = VGG16_extractor(images)
    for idx, id in enumerate(ids):
        npy_name = f'{id}.npy'
        np.save(f'{npy_save_dir}/{npy_name}', features[idx])

79it [20:08, 15.30s/it]
