In [None]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import torchvision.transforms._transforms_video as vidTranforms
import torchvision.io as io
import time
import os
import copy
from torch.utils.data import Dataset, DataLoader
import skvideo
skvideo.setFFmpegPath("/usr/bin/")
import skvideo.io  
import apex.amp as amp
import apex
import random
import pandas as pd
import seaborn as sns

In [None]:
# load each video
def loader(path):
    vid = skvideo.io.vread(path) 
    if len(vid)>10:
        firstFrame = random.randint(0, len(vid)-11)
        fin = firstFrame + 10
    else:
        firstFrame = 0
        fin = len(vid)
    vidSlice = vid[firstFrame:fin, :,:,:]
    vidFin = torch.from_numpy(vidSlice)
    print(vidFin.size())
    return vidFin

In [None]:
# data augmentation
data_transforms = {
    'train': transforms.Compose([
        vidTranforms.ToTensorVideo(),
        vidTranforms.RandomCropVideo((320, 240)),
        vidTranforms.RandomHorizontalFlipVideo(),
        vidTranforms.NormalizeVideo(mean = [0.43216, 0.394666, 0.37645], std = [0.22803, 0.22145, 0.216989])
    ]),
    'val': transforms.Compose([
        vidTranforms.ToTensorVideo(),
        vidTranforms.RandomCropVideo((320, 240)),
        vidTranforms.RandomHorizontalFlipVideo(),
        vidTranforms.NormalizeVideo(mean = [0.43216, 0.394666, 0.37645], std = [0.22803, 0.22145, 0.216989])
    ]),
}

data_dir = 'MS-ASLStable'

# load data
video_datasets = {x: datasets.DatasetFolder(os.path.join(data_dir, x), loader=loader, extensions='.mp4', transform = data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(video_datasets[x], batch_size=2, shuffle=True, num_workers=4, pin_memory=False)
              for x in ['train', 'val']}
dataset_sizes = {x: len(video_datasets[x]) for x in ['train', 'val']}
class_names = video_datasets['train'].classes
print(class_names)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.set_enabled_lms(True)
lossArr = []
accArr = []
valLoss = []
valAcc = []

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs):
    since = time.time()
        
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
                
            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
#                 print(list(inputs.size()))
                labels = labels.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()
                model.zero_grad()
                
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                        optimizer.step()
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)                
                
                del preds
                del labels                                    
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            if phase == 'train':
                lossArr.append(epoch_loss)
                accArr.append(epoch_acc.item())
                scheduler.step()
            if phase == 'train':
                valLoss.append(epoch_loss)
                valAcc.append(epoch_acc.item())
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), "aslModel.pth.tar")
    return model

In [None]:
# initialize model architecture
model_ft = torchvision.models.video.r2plus1d_18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, len(class_names))
model_ft = model_ft.to(device)

# load model weights
weightTensor = torch.from_numpy(np.loadtxt('shortWeights'))
weightTensor = weightTensor.to(device).float()
loss = nn.CrossEntropyLoss(weight = weightTensor)
del weightTensor

# initialize loss and learning rate scheduler
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9, weight_decay=0.005)
# optimizer_ft = apex.optimizers.FusedSGD(model_ft.parameters(), lr=0.001, momentum=0.9, weight_decay=0.005)
model_ft, optimizer_ft = amp.initialize(model_ft, optimizer_ft, opt_level="O1")
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.03)

In [None]:
torch.cuda.empty_cache() #clear memory
model_ft = train_model(model_ft, loss, optimizer_ft, exp_lr_scheduler, num_epochs=30)

In [None]:
# graph loss and accuracy arrays to show data over time
df = pd.DataFrame(valLoss, columns=["loss"])
ax = sns.lineplot(sort=False, data=df)
ax.set(xlabel='Epochs', ylabel='Loss')
dy = pd.DataFrame(accArr, columns=["accuracy"])
sns.lineplot(sort=False, data=dy)
dx = pd.DataFrame(valLoss, columns=["loss"])
sns.lineplot(sort=False, data=dx)
dz = pd.DataFrame(valAcc, columns=["accuracy"])
sns.lineplot(sort=False, data=dz)

In [None]:
# save out model
torch.save(model_ft, "fullTestModel.pth.tar")

In [None]:
# def loader(path): 
#     vid, _, _ = io.read_video(path, pts_unit='sec')
#     return vid

In [None]:
# create array of weights to use when training the model
arr = np.zeros(len(class_names))
counter = 0
for _, className in video_datasets['train']:
    if (counter%100 == 0):
        print(counter)
    arr[className] += 1
    counter += 1
sum = np.sum(arr)
arr = np.subtract(sum, arr)
weightsArray = np.true_divide(arr, sum)
print(weightsArray)
np.savetxt('shortWeights', weightsArray)