In [2]:
from reader import readShortVideo
from reader import getVideoList
import matplotlib.pyplot as plt
from os import listdir
import os
import pandas as pd
import numpy as np
import pickle

import torchvision
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import skimage.io
import skimage

import torch.nn as nn
%matplotlib inline
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [None]:
# load data
class CustomDataset(Dataset):
    def __init__(self, train_valid="train"):
        if train_valid == "train":
            with open("../train_X.pkl", "rb") as f:
                self.X = pickle.load(f)
            with open("../train_y.pkl", "rb") as f:
                self.y = pickle.load(f)
                
        if train_valid == "valid":
            with open("../valid_X.pkl", "rb") as f:
                self.X = pickle.load(f)
            with open("../valid_y.pkl", "rb") as f:
                self.y = pickle.load(f)
    def __getitem__(self, index):
        single_image = self.X[index]
        single_label = self.y[index]

        # Return image and the label
        return single_image, single_label

    def __len__(self):
        return self.length

train_dataset = CustomDataset("train")
valid_dataset = CustomDataset("valid")

In [None]:
cnn_feature_extractor = torchvision.models.densenet121(pretrained=True).features.cuda()

In [None]:
with torch.no_grad():
    output_size = cnn_feature_extractor(train_dataset[0][0].cuda()).size()
print(output_size)

In [None]:
# create sequential features for RNN
feature_size = 1024*7*7
cnn_feature_extractor.eval()
train_features = []
counter = 0
with torch.no_grad():
    for i in range(len(train_dataset.X)):
        input_X = train_dataset[i][0]
        feature = cnn_feature_extractor(input_X.cuda()).cpu().view(-1, feature_size)
        train_features.append(feature)
        counter +=1
        if counter % 300 == 0:
            print(counter)
print("training instances done")

valid_features = []
counter = 0
with torch.no_grad():
    for i in range(len(valid_dataset.X)):
        input_X = valid_dataset[i][0]
        feature = cnn_feature_extractor(input_X.cuda()).cpu().view(-1, feature_size)
        valid_features.append(feature)
        counter +=1
        if counter % 100 == 0:
            print(counter)

In [None]:
with open("../train_features_d12.pkl", "wb") as f:
    pickle.dump(train_features, f)
with open("../valid_features_d12.pkl", "wb") as f:
    pickle.dump(valid_features, f)


# training sample for seq2seq prediciton

In [None]:
def normalize(image):
    '''
    normalize for pre-trained model input
    '''
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    transform_input = transforms.Compose([
             transforms.ToPILImage(),
             transforms.Pad((0,40), fill=0, padding_mode='constant'),
             transforms.Resize(224),
             # transforms.CenterCrop(224),
    #         transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ])
    return transform_input(image)

In [None]:
# load data from FullLength folder
# training set
print("training set .....")


with torch.no_grad():
    video_path = "../HW5_data/FullLengthVideos/videos/train/"
    category_list = sorted(listdir(video_path))

    category = category_list[1]
    train_all_video_frame = []
    cnn_feature_extractor = torchvision.models.densenet121(pretrained=True).features.cuda()
    for category in category_list:
        print("category:",category)
        image_list_per_folder = sorted(listdir(os.path.join(video_path,category)))
        category_frames = []
        for image in image_list_per_folder:
            image_rgb = skimage.io.imread(os.path.join(video_path, category,image))
            image_nor = normalize(image_rgb)
            feature = cnn_feature_extractor(image_nor.view(1,3,224,224).cuda()).cpu().view(1024*7*7)
            category_frames.append(feature)
        train_all_video_frame.append(torch.stack(category_frames))

    print("\nvalidation set .....")
    video_path = "../HW5_data/FullLengthVideos/videos/valid/"
    category_list = sorted(listdir(video_path))

    category = category_list[1]
    test_all_video_frame = []
    for category in category_list:
        print("category:",category)
        image_list_per_folder = sorted(listdir(os.path.join(video_path,category)))
        category_frames = []
        for image in image_list_per_folder:
            image_rgb = skimage.io.imread(os.path.join(video_path, category,image))
            image_nor = normalize(image_rgb)
            feature = cnn_feature_extractor(image_nor.view(1,3,224,224).cuda()).cpu().view(1024*7*7)
            category_frames.append(feature)
        test_all_video_frame.append(torch.stack(category_frames))

In [None]:
with open("train_FullLength_features.pkl", "wb") as f:
    pickle.dump(train_all_video_frame, f)

In [None]:
with open("valid_FullLength_features.pkl", "wb") as f:
    pickle.dump(test_all_video_frame, f)

### Cut to define size

In [3]:
with open("../features/train_FullLength_features.pkl", "rb") as f:
    train_all_video_frame = pickle.load(f)
with open("../features/valid_FullLength_features.pkl", "rb") as f:
    valid_all_video_frame = pickle.load(f)

In [10]:
# load ground truth
label_path = "../HW5_data/FullLengthVideos/labels/train/"
category_txt_list = sorted(listdir(label_path))
train_category_labels = []
for txt in category_txt_list:
    file_path = os.path.join(label_path,txt)
    with open(file_path,"r") as f:
        label = [int(w.strip()) for w in f.readlines()]
        train_category_labels.append(label)
        
label_path = "../HW5_data/FullLengthVideos/labels/valid/"
category_txt_list = sorted(listdir(label_path))
valid_category_labels = []
for txt in category_txt_list:
    file_path = os.path.join(label_path,txt)
    with open(file_path,"r") as f:
        label = [int(w.strip()) for w in f.readlines()]
        valid_category_labels.append(label)

### using "slice" function in torch

In [11]:
def cut_frames(features_per_category, labels_per_category, size = 200, overlap = 20):
    feature_size = 50176
    a = torch.split(features_per_category, size-overlap)
    b = torch.split(torch.Tensor(labels_per_category), size-overlap)

    cut_features = []
    cut_labels = []
    for i in range(len(a)):
        if i==0:
            cut_features.append(a[i])
            cut_labels.append(b[i])
        else:
            cut_features.append(torch.cat((a[i-1][-overlap:],a[i])))
            cut_labels.append(torch.cat((b[i-1][-overlap:],b[i])))
    
    lengths = [len(f) for f in cut_labels]
#     perm_index = np.argsort(lengths)[::-1]
#     cut_features =  [cut_features[i] for i in perm_index]
#     cut_labels =  [cut_labels[i] for i in perm_index]
#     lengths = sorted(lengths, reverse=True)
#     cut_features_pad = nn.utils.rnn.pad_sequence(cut_features, batch_first=True)
    return cut_features, cut_labels, lengths

In [12]:
r1, r2, r3 = cut_frames(train_all_video_frame[0],train_category_labels[0], size = 120, overlap = 10)

In [13]:
cutting_steps = 350
overlap_steps = 30
train_cut_features = []
train_cut_labels = []
train_cut_lengths = []
for category_frames, category_labels in zip(train_all_video_frame,train_category_labels):
    features, labels, lengths = cut_frames(category_frames,category_labels, 
                                           size = cutting_steps, overlap = overlap_steps)
    train_cut_features += features
    train_cut_labels += labels
    train_cut_lengths += lengths
    print("one category done")

one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done
one category done


In [22]:
valid_lengths = [len(s) for s in valid_all_video_frame]

In [23]:
valid_lengths

[2140, 938, 857, 809, 1360]

In [71]:
# valid 不 cut

# valid_cut_features = []
# valid_cut_labels = []
# valid_cut_lengths = []
# for category_frames, category_labels in zip(valid_all_video_frame,valid_category_labels):
#     features, labels, lengths = cut_frames(category_frames,category_labels, 
#                                            size = cutting_steps, overlap = 0)
#     valid_cut_features += features
#     valid_cut_labels += labels
#     valid_cut_lengths += lengths
#     print("one category done")

one category done
one category done
one category done
one category done
one category done


In [24]:
with open("../features/train_cut_features_350.pkl", "wb") as f:
    pickle.dump(train_cut_features,f)
with open("../features/train_cut_labels_350.pkl", "wb") as f:
    pickle.dump(train_cut_labels,f)
with open("../features/train_cut_lengths_350.pkl", "wb") as f:
    pickle.dump(train_cut_lengths,f)
    
with open("../features/valid_cut_features_no_cut.pkl", "wb") as f:
    pickle.dump(valid_all_video_frame,f)
with open("../features/valid_cut_labels_no_cut.pkl", "wb") as f:
    pickle.dump(valid_category_labels,f)
with open("../features/valid_cut_lengths_no_cut.pkl", "wb") as f:
    pickle.dump(valid_lengths,f)    