In [0]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import torch
import torchvision
from torch import nn
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.autograd import Variable
from torchvision import datasets, transforms, models
from torch.utils.data.sampler import SubsetRandomSampler

import numpy as np
import copy
import cv2
from PIL import Image
from matplotlib import pyplot as plt

In [0]:
img_rows, img_cols = 200, 200
#number of output classes
nb_classes = 5
# Number of epochs to train 
nb_epoch = 15  #25
# Total number of convolutional filters to use
nb_filters = 32
# Max pooling
nb_pool = 2
# Size of convolution kernel
nb_conv = 3
#batch size in training the model
batch_size = 50

# **CNN model pytorch version**

In [0]:
# #pytorch version of keras CNN model  .
#do not run this
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(nb_conv, nb_conv))
        self.conv2 = nn.Conv2d(32, 32 , kernel_size=(nb_conv, nb_conv))

        self.drop1 = nn.Dropout(0.5)
        self.drop2 = nn.Dropout(0.5)
        self.pool = nn.MaxPool2d(2)
        self.dense1 = nn.Linear(32*98*98, 128)  # equivalent to Dense in keras
        self.dense2 = nn.Linear(128, 5)  # equivalent to Dense in keras
        self.softmax = nn.LogSoftmax(dim=1)    
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.drop1(x)
        x = x.view(-1, 32*98*98)
        x = self.dense1(x)
        x = self.drop2(x)
        x = self.dense2(x)
        x = self.softmax(x)

        return x

In [0]:
input_shape=(1, 200, 200)
model = NeuralNet()
summary(model, input_shape)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 198, 198]             320
            Conv2d-2         [-1, 32, 196, 196]           9,248
         MaxPool2d-3           [-1, 32, 98, 98]               0
           Dropout-4           [-1, 32, 98, 98]               0
            Linear-5                  [-1, 128]      39,338,112
           Dropout-6                  [-1, 128]               0
            Linear-7                    [-1, 5]             645
Total params: 39,348,325
Trainable params: 39,348,325
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.15
Forward/backward pass size (MB): 23.64
Params size (MB): 150.10
Estimated Total Size (MB): 173.90
----------------------------------------------------------------


In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
#Over here we want to only update the parameters of the classifier so
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=0.0003)
torch.optim.Adagrad(params, lr=0.001, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)
model.to(device)

# **Defining the train/test loader with it's transforms**

In [0]:
img_dir = r'C:/Users/Meeta Malviya/Videos/data2/train'
test_dir = r'C:/Users/Meeta Malviya/Videos/data2/test'

train_transforms = transforms.Compose([transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
                                      transforms.RandomRotation(degrees=10),
                                      transforms.ColorJitter(),
                                      transforms.Grayscale(num_output_channels=3),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.CenterCrop(size=224),  # Image net standards
                                      transforms.ToTensor()
                                      ])

test_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.Grayscale(num_output_channels=3),
                                      transforms.ToTensor()
                                     ])

validation_transforms = transforms.Compose([transforms.Resize(256),
                                            transforms.CenterCrop(224),
                                            transforms.Grayscale(num_output_channels=3),
                                            transforms.ToTensor()
                                           ])


In [0]:
#Loading in the dataset

train_data = datasets.ImageFolder(img_dir, transform=train_transforms)
test_data = datasets.ImageFolder(test_dir, transform=test_transforms)

In [0]:
# obtain training indices that will be used for validation
valid_size = 0.2

num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# load training data in batches
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=batch_size,
                                           sampler=train_sampler,
                                           num_workers=0)

# load validation data in batches
valid_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=batch_size,
                                           sampler=valid_sampler,
                                           num_workers=0)

# load test data in batches
test_loader = torch.utils.data.DataLoader(test_data,
                                          batch_size=batch_size,
                                          num_workers=0)

## **Loading the Resnet-50 model**

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#pretrained=True will download a pretrained network for us
model = models.resnet50(pretrained=True)

#Freezing model parameters and defining the fully connected network to be attached to the model, loss function and the optimizer.
#then we add a sequential model and loss criterion and optimizer
for param in model.parameters():
    param.require_grad = False
 
model.fc = nn.Sequential(
    nn.Linear(2048, 5),
    nn.LogSoftmax(dim=1)    
)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.001, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [0]:
#defining check point for the model and saving weights of the best 
def save_checkpoint(state, is_best, filename=r'C:/Users/Meeta Malviya/Videos/Vgg/restnet-vedio'):
    """Save checkpoint if a new best is achieved"""

    if is_best:
        print ("=> Saving a new best")
        torch.save(state, filename)  # save checkpoint
    else:
        print ("=> Validation Accuracy did not improve")

In [0]:
#definig early stopping class
import numpy as np
import torch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

# **Training the model**

In [0]:
def train_model(model, n_epochs):
    
    # to track the training loss as the model trains
    train_losses = []
    # to track the validation loss as the model trains
    valid_losses = []
    # to track the average training loss per epoch as the model trains
    avg_train_losses = []
    # to track the average validation loss per epoch as the model trains
    avg_valid_losses = [] 
    
    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=40, verbose=1)
    
    for epoch in range(1, n_epochs + 1):

        ###################
        # train the model #
        ###################
        model.train() # prep model for training
        for batch, (data, target) in enumerate(train_loader, 1):
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # record training loss
            train_losses.append(loss.item())

        ######################    
        # validate the model #
        ######################
        model.eval() # prep model for evaluation
        for data, target in valid_loader:
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the loss
            loss = criterion(output, target)
            # record validation loss
            valid_losses.append(loss.item())

        # print training/validation statistics 
        # calculate average loss over an epoch
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)
        avg_train_losses.append(train_loss)
        avg_valid_losses.append(valid_loss)
        
        epoch_len = len(str(n_epochs))
        
        print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
                     f'train_loss: {train_loss:.5f} ' +
                     f'valid_loss: {valid_loss:.5f}')
        
        print(print_msg)
        
        # clear lists to track next epoch
        train_losses = []
        valid_losses = []
        
        # early_stopping needs the validation loss to check if it has decresed, 
        # and if it has, it will make a checkpoint of the current model
        early_stopping(valid_loss, model)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break
        
    # load the last checkpoint with the best model
    model.load_state_dict(torch.load('checkpoint.pt'))

    return  model, avg_train_losses, avg_valid_losses

In [0]:
# batch_size = 50
n_epochs = 35

# train_loader, test_loader, valid_loader = create_datasets(batch_size)

# early stopping patience; how long to wait after last time validation loss improved.
patience = 10

model, train_loss, valid_loss = train_model(model, n_epochs)

[ 1/35] train_loss: 1.19047 valid_loss: 1.32220
Validation loss decreased (inf --> 1.322195).  Saving model ...
[ 2/35] train_loss: 0.48581 valid_loss: 1.21489
Validation loss decreased (1.322195 --> 1.214891).  Saving model ...
[ 3/35] train_loss: 0.18483 valid_loss: 1.35713
EarlyStopping counter: 1 out of 40
[ 4/35] train_loss: 0.12383 valid_loss: 0.34189
Validation loss decreased (1.214891 --> 0.341893).  Saving model ...
[ 5/35] train_loss: 0.07302 valid_loss: 0.29995
Validation loss decreased (0.341893 --> 0.299948).  Saving model ...
[ 6/35] train_loss: 0.06208 valid_loss: 0.52369
EarlyStopping counter: 1 out of 40
[ 7/35] train_loss: 0.03999 valid_loss: 0.59391
EarlyStopping counter: 2 out of 40
[ 8/35] train_loss: 0.03044 valid_loss: 0.09090
Validation loss decreased (0.299948 --> 0.090902).  Saving model ...
[ 9/35] train_loss: 0.01568 valid_loss: 0.11672
EarlyStopping counter: 1 out of 40
[10/35] train_loss: 0.02005 valid_loss: 0.23502
EarlyStopping counter: 2 out of 40
[11/3

KeyboardInterrupt: 

In [0]:
#saving the model
PATH=r'C:/Users/Meeta Malviya/Videos/Vgg/resnet_weights'
torch.save(model.state_dict(), PATH)

In [0]:
PATH=r'C:/Users/Meeta Malviya/Videos/Vgg/restnet_weights'

model.load_state_dict(torch.load(PATH))
model.eval()

#**Testing the model**

In [0]:
def test(model, criterion):
# monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.
    i = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        # move to GPU
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
        print("batch:", i, "/ 5")
        i = i + 1
            
    print('Test Loss: {:.6f}\n'.format(test_loss))
    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (100. * correct / total, correct, total))
test(model_test, criterion)

batch: 0 / 5
batch: 1 / 5
batch: 2 / 5
batch: 3 / 5
Test Loss: 0.110705


Test Accuracy: 95% (191/200)


# Defining functions for real time prediction

In [0]:
def image_process(image):
    predict_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.Grayscale(num_output_channels=3),
                                      transforms.ToTensor()
                                            ])
    image = predict_transforms(image)
    return image 


def predict(img, model):
    
    img = Image.fromarray(img)
    img = image_process(img)

    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img)
    
    model.eval()
    target = model(img)
    
    out = F.softmax(target,dim=1)
    topk = out.cpu().topk(5)
    
    return (e.data.numpy().squeeze().tolist() for e in topk)


def plotting(img, prob, classes, class_names):
    
    fig,ax = plt.subplots()
    y_pos = np.arange(len(probs))
    plt.bar(y_pos,probs)
    ax.set_xticks(y_pos)
    ax.set_xticklabels(classs)
    plt.xticks(rotation=45)
    plt.grid(True, which='both')
    plt.show()


In [0]:
#capturing vedio
cap = cv2.VideoCapture(0)
cv2.namedWindow('Original', cv2.WINDOW_NORMAL)
frame = 0

#a rectangle will be displayed gesture ought to me created there
while(cap.isOpened()):
    ret, img = cap.read()
    cv2.rectangle(img,(300,300),(100,100),(0,255,0),0)
    cv2.imshow('frame',img)
    crop_img = img[100:300, 100:300]
    cv2.imshow('cropped image',crop_img)   

    while( frame % 100 == 1 ):
    #so that the we don't feed in random transition image 
        prob, classes = predict(crop_img, model.to(device))
        class_names = [test_loader.dataset.classes[e] for e in classes]
        
        print(prob)
        print(class_names)
        plotting(prob,class_names)
        print(frame)
        frame = frame+1

    if frame == 45:
        plt.close('all')
           
    k = cv2.waitKey(100)
    frame = frame+1
    if k & 0xFF == ord('q'):
            break 