In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import torchvision.transforms._transforms_video as vidTranforms
import torchvision.io as io
import time
import os
import copy
from torch.utils.data import Dataset, DataLoader
import skvideo
skvideo.setFFmpegPath("/usr/bin/")
import skvideo.io  
import GPUtil
import apex.amp as amp
import apex

In [2]:
def loader(path):
#     vid = skvideo.io.vread(path)  
    vid, _, _ = io.read_video(path, pts_unit='sec')
#     videodata = vid.permute(3, 0, 1, 2)  
#     print(vid.shape)
    return vid

In [3]:
data_transforms = {
    'train': transforms.Compose([
        vidTranforms.ToTensorVideo(),
        vidTranforms.RandomHorizontalFlipVideo(),
        vidTranforms.NormalizeVideo(mean = [0.43216, 0.394666, 0.37645], std = [0.22803, 0.22145, 0.216989])
    ]),
    'val': transforms.Compose([
        vidTranforms.ToTensorVideo(),
        vidTranforms.RandomHorizontalFlipVideo(),
        vidTranforms.NormalizeVideo(mean = [0.43216, 0.394666, 0.37645], std = [0.22803, 0.22145, 0.216989])
    ]),
}

data_dir = 'MS-ASL100'

video_datasets = {x: datasets.DatasetFolder(os.path.join(data_dir, x), loader=loader, extensions='.mp4', transform = data_transforms[x])
                  for x in ['train', 'val']}
# print(len(video_datasets['train']))
dataloaders = {x: torch.utils.data.DataLoader(video_datasets[x], batch_size=1, shuffle=True, num_workers=4, pin_memory=False)
              for x in ['train', 'val']}
# print(len(dataloaders['train']))
dataset_sizes = {x: len(video_datasets[x]) for x in ['train', 'val']}
class_names = video_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# torch.distributed.init_process_group(backend = 'nccl', init_method='env://')
torch.cuda.set_enabled_lms(True)

In [4]:
def train_model(model, criterion, optimizer, scheduler, num_epochs):
    since = time.time()
        
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)
        torch.cuda.empty_cache()
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
                
            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
#                 print(list(inputs.size()))
                labels = labels.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()
                model.zero_grad()
                
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
#                     GPUtil.showUtilization(all=True)
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                        optimizer.step()
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)                
                
                del preds
                del labels                                    
                if(i%100==0):
                    print('Batch: {}'.format(i))
            if phase == 'train':
                scheduler.step()
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
#             epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), "aslModel.pth.tar")
    return model

In [5]:
model_ft = torchvision.models.video.r2plus1d_18(pretrained=True)
counter = 0
for param in model_ft.parameters():
    param.requires_grad = False
    if (counter > 9):
        break
    counter += 1
print(counter)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, len(class_names))
model_ft = model_ft.to(device)

weightTensor = torch.from_numpy(np.loadtxt('shortWeights'))
weightTensor = weightTensor.to(device).float()
loss = nn.CrossEntropyLoss(weight = weightTensor)
# loss = nn.CrossEntropyLoss()
del weightTensor


# optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.001)
optimizer_ft = apex.optimizers.FusedSGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.01)

model_ft, optimizer_ft = amp.initialize(model_ft, optimizer_ft, opt_level="O1")

10
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


In [None]:
torch.cuda.empty_cache()
model_ft = train_model(model_ft, loss, optimizer_ft, exp_lr_scheduler, num_epochs=100)

Epoch 1/100
----------
Batch: 0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Batch: 100
Batch: 200
Batch: 300
Batch: 400
train Loss: 2.6523 Acc: 0.1152




Batch: 0
Batch: 100
val Loss: 2.5008 Acc: 0.0526

Epoch 2/100
----------
Batch: 0
Batch: 100
Batch: 200
Batch: 300
Batch: 400
train Loss: 2.3680 Acc: 0.0991
Batch: 0
Batch: 100
val Loss: 2.3714 Acc: 0.0526

Epoch 3/100
----------
Batch: 0
Batch: 100
Batch: 200
Batch: 300
Batch: 400
train Loss: 2.3424 Acc: 0.1106
Batch: 0
Batch: 100
val Loss: 2.3842 Acc: 0.0965

Epoch 4/100
----------
Batch: 0
Batch: 100
Batch: 200
Batch: 300
Batch: 400
train Loss: 2.3314 Acc: 0.1060
Batch: 0
Batch: 100
val Loss: 2.3757 Acc: 0.0526

Epoch 5/100
----------
Batch: 0
Batch: 100
Batch: 200
Batch: 300
Batch: 400
train Loss: 2.3156 Acc: 0.1106
Batch: 0
Batch: 100
val Loss: 2.3636 Acc: 0.0877

Epoch 6/100
----------


In [7]:
# arr = np.zeros(len(class_names))
# counter = 0
# for _, className in video_datasets['train']:
#     if (counter%100 == 0):
#         print(counter)
#     arr[className] += 1
#     counter += 1
# sum = np.sum(arr)
# arr = np.subtract(sum, arr)
# weightsArray = np.true_divide(arr, sum)
# print(weightsArray)

0
100
200
300
400
[0.94009217 0.88018433 0.90322581 0.88018433 0.89861751 0.92165899
 0.90092166 0.89400922 0.88479263 0.89631336]


In [8]:
np.savetxt('shortWeights', weightsArray)