**Processing Video Pipeline Answer**


As we have working ML model to classify an object into carrot or not, if we want to use that in a video one thing we could do is we can have our video divided into each frame so it acts as an image and then use our model to see if that image contains carrot or not and we could do that for each frame of that video to detect it. But one problem I think with this approach is that if we have really slow detection model then this would take forever for video to process if video is too long.



**Toy Implementation**

In [None]:
from torch.nn.modules.loss import CrossEntropyLoss
import torch
import torchvision
from torchvision import transforms, datasets
import numpy as np
import timeit
from collections import OrderedDict
from pprint import pformat
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import random_split

torch.multiprocessing.set_sharing_strategy('file_system')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CrossELoss = F.cross_entropy

learning_rate = 1e-3
l1_lambda_1 = 3e-5

def compute_score(acc, min_thres, max_thres):
    if acc <= min_thres:
        base_score = 0.0
    elif acc >= max_thres:
        base_score = 100.0
    else:
        base_score = float(acc - min_thres) / (max_thres - min_thres) \
                     * 100
    return base_score


def run(algorithm, dataset_name, filename):
    start = timeit.default_timer()
    predicted_test_labels, gt_labels = algorithm(dataset_name)
    if predicted_test_labels is None or gt_labels is None:
      return (0, 0, 0)
    stop = timeit.default_timer()
    run_time = stop - start

    np.savetxt(filename, np.asarray(predicted_test_labels))

    correct = 0
    total = 0
    for label, prediction in zip(gt_labels, predicted_test_labels):
      total += label.size(0)
      correct += (prediction.cpu().numpy() == label.cpu().numpy()).sum().item()
    accuracy = float(correct) / total

    print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
    return (correct, accuracy, run_time)



# Logistic regression
class LogisticRegression(nn.Module):
    def __init__(self,input,output):
        super(LogisticRegression, self).__init__()
        self.fc = nn.Linear(input, output)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

class One_Hot(nn.Module):
    def __init__(self, depth):
        super(One_Hot,self).__init__()
        self.depth = depth
        self.ones = torch.sparse.torch.eye(depth).to(device)
    def forward(self, X_in):
        X_in = X_in.long()
        return self.ones.index_select(0,X_in.data)
    def __repr__(self):
        return self.__class__.__name__ + "({})".format(self.depth)

one_hot = One_Hot(10).to(device)


def train(epoch,train_loader, model, optimizer, lam):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device)
        target = target.to(device)
        output = model(data)

        l1_norm = sum(i.abs().sum()for i in model.parameters())
        loss = CrossELoss(output, one_hot(target))
        loss += (lam * l1_norm)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def validation(validation_loader, model):
  model.eval()
  validation_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in validation_loader:
      data = data.to(device)
      target = target.to(device)
      output = model(data)
      pred = output.data.max(1)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
      validation_loss += CrossELoss(output, one_hot(target)).item()
      validation_loss /= len(validation_loader.dataset)

def test(test_loader,model):
  loss = nn.CrossEntropyLoss()
  model.eval()
  test_loss,correct = 0, 0
  p_labels, gt_labels = [], []
  with torch.no_grad():
    for data, target in test_loader:
      data = data.to(device)
      target = target.to(device)
      output = model(data)
      test_loss += CrossELoss(output, one_hot(target)).item()
      pred = output.data.max(1)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
      p_labels.append(pred)
      gt_labels.append(one_hot(target).argmax(1))

  p_labels = torch.stack(p_labels, dim = 0)
  gt_labels = torch.stack(gt_labels, dim = 0)
  return p_labels, gt_labels

def logistic_regression(dataset_name):


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_epochs = 10


    p_labels, gt_labels = [], []


    if dataset_name == 'MNIST':
      MNIST_training = torchvision.datasets.MNIST(root = './data', train=True, download=True,
                              transform=transforms.ToTensor())

      MNIST_test_set = torchvision.datasets.MNIST(root = './data', train=False, download=True,
                              transform=transforms.ToTensor())


      MNIST_training_set, MNIST_validation_set = torch.utils.data.random_split(MNIST_training, [48000, 12000], generator = torch.Generator().manual_seed(42))


      train_loader = torch.utils.data.DataLoader(MNIST_training_set,batch_size=75, shuffle=True)

      validation_loader = torch.utils.data.DataLoader(MNIST_validation_set,batch_size=75, shuffle=True)

      test_loader = torch.utils.data.DataLoader(MNIST_test_set,batch_size=1000, shuffle=True)

      model = LogisticRegression(28*28,10).to(device)
      optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

      validation(validation_loader,model)
      for epoch in (1, n_epochs+1):

        train(epoch, train_loader, model,optimizer,l1_lambda_1)
        validation(validation_loader,model)
      p_labels,  gt_labels  = test(test_loader, model)


    elif dataset_name == "CIFAR10":
      CIFAR10_training = torchvision.datasets.CIFAR10(root = './data', train=True, download=True,
                              transform=torchvision.transforms.Compose([
                                 torchvision.transforms.ToTensor(),
                                 torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]))

      CIFAR10_test_set = torchvision.datasets.CIFAR10(root = './data', train=False, download=True,
                              transform=torchvision.transforms.Compose([
                                 torchvision.transforms.ToTensor(),
                                 torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]))

      CIFAR10_training_set, CIFAR10_validation_set = random_split(CIFAR10_training, [38000, 12000],generator = torch.Generator().manual_seed(42))


      train_loader = torch.utils.data.DataLoader(CIFAR10_training_set,batch_size=70, shuffle=True)

      validation_loader = torch.utils.data.DataLoader(CIFAR10_validation_set,batch_size=70, shuffle=True)

      test_loader = torch.utils.data.DataLoader(CIFAR10_test_set,batch_size=1000, shuffle=True)

      model = LogisticRegression(32*32*3,10).to(device)
      optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

      validation(validation_loader,model)
      for epoch in (1, n_epochs+1):

        train(epoch, train_loader, model,optimizer,l1_lambda_1)
        validation(validation_loader,model)
      p_labels,  gt_labels  = test(test_loader, model)


    return p_labels.cpu(), gt_labels.cpu()

def tune_hyper_parameter():
  n_epochs = 7
  CIFAR10_training = torchvision.datasets.CIFAR10(root = './data', train=True, download=True,
                                    transform=torchvision.transforms.Compose([
                                      torchvision.transforms.ToTensor(),
                                      torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]))

  CIFAR10_test_set = torchvision.datasets.CIFAR10(root = './data', train=False, download=True,
                                  transform=torchvision.transforms.Compose([
                                    torchvision.transforms.ToTensor(),
                                    torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]))


  CIFAR10_training_set, CIFAR10_validation_set = random_split(CIFAR10_training, [38000, 12000])


  train_loader = torch.utils.data.DataLoader(CIFAR10_training_set,batch_size=128, shuffle=True,num_workers=2)

  validation_loader = torch.utils.data.DataLoader(CIFAR10_validation_set,batch_size=128, shuffle=True,num_workers=2)

  test_loader = torch.utils.data.DataLoader(CIFAR10_test_set,batch_size=1000, shuffle=True,num_workers=2)

  model = LogisticRegression(32*32*3,10).to(device)

  SGD_optimizer_1 = torch.optim.SGD(model.parameters(), lr =0.001)
  SGD_optimizer_2 = torch.optim.SGD(model.parameters(), lr =0.00001)

  Adam_optimizer_1 = torch.optim.Adam(model.parameters(), lr =0.001)
  Adam_optimizer_2 = torch.optim.Adam(model.parameters(), lr =0.00001)

  optimizers = [SGD_optimizer_1,SGD_optimizer_2,Adam_optimizer_1,
                Adam_optimizer_2]

  run_time = 0

  l1_lambda = [0.00045,0.00003]
  best_acc = 0
  best_model = None
  tuned_lambda = 0
  correct = 0
  start = timeit.default_timer()
  for i in optimizers:
      for j in l1_lambda:

        model = LogisticRegression(32*32*3,10).to(device)

        optimizer = i
        lambda_1 = j

        for epoch in range(1, n_epochs+1):
            train(epoch,train_loader,model,optimizer,lambda_1 )


        with torch.no_grad():
            for data, target in validation_loader:
                data = data.to(device)
                target = target.to(device)
                output = model(data)
                pred = output.data.max(1)[1]
                correct += pred.eq(target.data.view_as(pred)).sum()
            accuracy = (correct * 100.0 )/(len(validation_loader.dataset))

            if accuracy > best_acc:
                  best_acc = accuracy
                  tuned_optimizer = i
                  tuned_lambda = j
                  best_model =  model
  stop = timeit.default_timer()
  run_time = stop - start

  tuned_parameters = [tuned_lambda,tuned_optimizer]
  print("-------------------")
  print("CIFAR10 tuned")

  return run_time,tuned_parameters,best_acc


"""Main loop. Run time and total score will be shown below."""

def run_on_dataset(dataset_name, filename):
    if dataset_name == "MNIST":
        min_thres = 0.82
        max_thres = 0.92

    elif dataset_name == "CIFAR10":
        min_thres = 0.28
        max_thres = 0.38

    correct_predict, accuracy, run_time = run(logistic_regression, dataset_name, filename)

    score = compute_score(accuracy, min_thres, max_thres)
    result = OrderedDict(correct_predict=correct_predict,
                         accuracy=accuracy, score=score,
                         run_time=run_time)
    return result, score


def main():
    filenames = { "MNIST": "predictions_mnist.txt", "CIFAR10": "predictions_cifar10.txt"}
    result_all = OrderedDict()
    score_weights = [0.5, 0.5]
    scores = []
    for dataset_name in ["MNIST","CIFAR10"]:
        result_all[dataset_name], this_score = run_on_dataset(dataset_name, filenames[dataset_name])
        scores.append(this_score)
    total_score = [score * weight for score, weight in zip(scores, score_weights)]
    total_score = np.asarray(total_score).sum().item()
    result_all['total_score'] = total_score


main()
tune_hyper_parameter()


Accuracy of the network on the 10000 test images: 91 %
Files already downloaded and verified
Files already downloaded and verified
Accuracy of the network on the 10000 test images: 38 %

Result:
 OrderedDict([   (   'MNIST',
                    OrderedDict([   ('correct_predict', 9141),
                                    ('accuracy', 0.9141),
                                    ('score', 94.09999999999998),
                                    ('run_time', 16.66946894800003)])),
                (   'CIFAR10',
                    OrderedDict([   ('correct_predict', 3843),
                                    ('accuracy', 0.3843),
                                    ('score', 100.0),
                                    ('run_time', 33.64073344899998)])),
                ('total_score', 97.04999999999998)])
Files already downloaded and verified
Files already downloaded and verified
