# HP_BOSCH_T17

## Code Tested on Colab Pro Plus subscription. 



## Refer to [readme.pdf](https://drive.google.com/file/d/1vbNsY0Olfs9IShPD0dxdACSXCzhT1MU9/view?usp=sharing) for detailed instructions to setup the environment before running this file.

## Refer to the anonymised drive folder [Evaluation_All](https://drive.google.com/drive/folders/19BM3f4UeywAuhPi1xk7iJIXP0q5pi2kB?usp=sharing) for the entire setup and directory structure.

# Black Box P1

In [None]:
!pip install av

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Enter path to google drive folder
%cd /content/drive/MyDrive/HP_BS_T17/Evaluation_All

In [None]:
PATH_labelmap = "./label_map_k400.txt"
PATH_model =  './checkpoint/Blackbox_P1.pt'  #Add desired model checkpoint 
PATH_files = './k400_val/'

#Imports
import torch
import glob
import pandas as pd
import os
import re
from torchvision.io import read_video as rv
import numpy as np
from sklearn.metrics import top_k_accuracy_score as tkscore


import warnings
warnings.filterwarnings("ignore")

"""# Load files"""

def class_to_label(class_):
    class_= re.sub(r'[^\w\s]', '', class_)
    class_ = class_.replace('_','')
    class_ = class_.replace(' ','')
    #print(class_)
    df=pd.read_csv(PATH_labelmap,names=['class'])
    df['class']=df['class'].str.replace(' ', '')
    df["class"] = df['class'].str.replace('[^\w\s]','')
    #print(df['class'])
    #print(class_)
    ret_= df.index[df['class']==class_]
    #print(ret_)
    return ret_[0]

def sample_class_paths(PATH, nvids):
    d_vid={}
    root,dirs,files=next(os.walk(PATH))
    for dir in dirs:
        root=str(root)
        dir=str(dir)
        _,_,file=next(os.walk(root+'/'+dir))
        d_vid['{}'.format(dir)]=[file[:nvids]]
    return d_vid

def initialize_model(checkpoint):
  model = torch.load(checkpoint)
  model = model.eval()
  return model

def parse_video(PATH):
    video=rv(PATH)
    samp=video[0]
    #print(samp.size())
    samp1= samp.type(torch.FloatTensor).permute(3,0,1,2)
    #samp1=samp1[:,:,:128,:128]   Use in case of OOM errors
    #print(samp1.size())
    return samp1

def eval(num_videos = 8, num_classes = 400, checkpoint = None):
  with torch.no_grad():
    true_labels = []
    pred_probs = []   
    if(checkpoint is None):
      print("Please provide model checkpont")
      return False
    model = initialize_model(checkpoint)
    dict_paths = sample_class_paths(PATH_files, num_videos)
    counter = 0
    for keys in dict_paths.keys():
      for files in dict_paths[keys][0]:
        true_labels.append(class_to_label(keys))
        vid_tensor = parse_video(PATH_files+f'/{keys}/{files}') # Size(channels, frames, height, width)
        ## Error handling code for broken videos
        try:  
          model_logits = model(torch.unsqueeze(vid_tensor[:,:,:,:].cuda(),dim=0))
          #model_logits = model(torch.unsqueeze(vid_tensor[:,:32,:,:].cuda(),dim=0))  #Use in case of OOM error
        except:
          model_logits = torch.zeros(1,num_classes)  
        model_probabs = torch.nn.functional.softmax(model_logits,dim=1)
        pred_probs.append(model_probabs)
        counter+=1
        print(f"Video number = {counter}")

    true_label_arr = np.array(true_labels) # (3200,1)
    temp_tensor = torch.zeros(counter,num_classes)
    for i in range(counter):
      temp_tensor[i] = pred_probs[i].cpu()
    pred_probs_arr = np.array(temp_tensor)

    accuracy_top5 = tkscore(true_label_arr,pred_probs_arr,k=5)
    print(f"Top 5 accuracy is {accuracy_top5*100} %")
    return accuracy_top5

In [None]:
num_classes = 400 #Kinetics 400 dataset
num_videos = 24 #Number of videos from each class to pick for evaluation

with torch.no_grad():
  acc = eval(checkpoint = PATH_model, num_videos = num_videos, num_classes = num_classes)

# Black Box P2

In [None]:
PATH_labelmap = "./label_map_k600.txt"
PATH_model =  './checkpoint/Blackbox_P2.pt'  #Add desired model checkpoint 
PATH_files = './k600_val/'

#Imports
import torch
import glob
import pandas as pd
import os
import re
from torchvision.io import read_video as rv
import numpy as np
from sklearn.metrics import top_k_accuracy_score as tkscore


import warnings
warnings.filterwarnings("ignore")

"""# Load files"""

def class_to_label(class_):
    class_= re.sub(r'[^\w\s]', '', class_)
    class_ = class_.replace('_','')
    class_ = class_.replace(' ','')
    #print(class_)
    df=pd.read_csv(PATH_labelmap,names=['class'])
    df['class']=df['class'].str.replace(' ', '')
    df["class"] = df['class'].str.replace('[^\w\s]','')
    #print(df['class'])
    #print(class_)
    ret_= df.index[df['class']==class_]
    #print(ret_)
    return ret_[0]

def sample_class_paths(PATH, nvids):
    d_vid={}
    root,dirs,files=next(os.walk(PATH))
    for dir in dirs:
        root=str(root)
        dir=str(dir)
        _,_,file=next(os.walk(root+'/'+dir))
        d_vid['{}'.format(dir)]=[file[:nvids]]
    return d_vid

def initialize_model(checkpoint):
  model = torch.load(checkpoint)
  model = model.eval()
  return model

def parse_video(PATH):
    video=rv(PATH)
    samp=video[0]
    #print(samp.size())
    samp1= samp.type(torch.FloatTensor).permute(3,0,1,2)
    #samp1=samp1[:,:,:128,:128] Use in case of OOM errors
    #print(samp1.size())
    return samp1

def eval(num_videos = 8, num_classes = 400, checkpoint = None):
  with torch.no_grad():
    true_labels = []
    pred_probs = []   
    if(checkpoint is None):
      print("Please provide model checkpont")
      return False
    model = initialize_model(checkpoint)
    dict_paths = sample_class_paths(PATH_files, num_videos)
    counter = 0
    for keys in dict_paths.keys():
      for files in dict_paths[keys][0]:
        true_labels.append(class_to_label(keys))
        vid_tensor = parse_video(PATH_files+f'/{keys}/{files}') # Size(channels, frames, height, width)
        ## Error handling code for broken videos
        try:  
          model_logits = model(torch.unsqueeze(vid_tensor[:,:,:,:].cuda(),dim=1))
          #model_logits = model(torch.unsqueeze(vid_tensor[:,:32,:,:].cuda(),dim=0))  #Use in case of OOM error
        except:
          model_logits = torch.zeros(1,num_classes)
        model_probabs = torch.nn.functional.softmax(model_logits,dim=0)
        pred_probs.append(model_probabs)
        counter+=1
        print(f"Video number = {counter}")

    true_label_arr = np.array(true_labels) # (3200,1)
    temp_tensor = torch.zeros(counter,num_classes)
    for i in range(counter):
      temp_tensor[i] = pred_probs[i].cpu()
    pred_probs_arr = np.array(temp_tensor)

    accuracy_top5 = tkscore(true_label_arr,pred_probs_arr,k=5)
    print(f"Top 5 accuracy is {accuracy_top5*100} %")
    return accuracy_top5

In [None]:
num_classes = 600 #Kinetics 600 dataset
num_videos = 24 #Number of videos from each class to pick for evaluation

with torch.no_grad():
  acc = eval(checkpoint = PATH_model, num_videos = num_videos, num_classes = num_classes)

# Grey Box P1

In [None]:
!pip install timm

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision.transforms import ToTensor
from torch.utils.data import random_split
import torch.nn as nn
import torch.nn.functional as F
import timm
import glob
import pandas as pd
import os
import re
from torchvision.io import read_video as rv
import numpy as np
from sklearn.metrics import top_k_accuracy_score as tkscore

In [None]:
class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch
        out = self(images)  # Generate predictions
        loss = F.cross_entropy(out, labels)  # Calculate loss
        return loss

    def validation_step(self, batch):
        images, labels = batch
        out = self(images)  # Generate predictions
        loss = F.cross_entropy(out, labels)  # Calculate loss
        acc = accuracy(out, labels)  # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}

    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()  # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()  # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}

    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))


def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))


class ResNet2DFramewise(ImageClassificationBase):
    def __init__(self, num_classes=400):
        super().__init__()
        self.network = timm.create_model('res2net101_26w_4s', num_classes=num_classes, pretrained=False)

    def forward(self, xb):
        return self.network(xb)

In [None]:
PATH_labelmap = "./label_map_k400.txt"
PATH_model =  './checkpoint/Greybox_P1.pt'  #Add desired model checkpoint 
PATH_files = './k400_val/'


transform = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
transform = transforms.Compose([transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
     transforms.Resize((64, 64))
     ])

import warnings
warnings.filterwarnings("ignore")


def class_to_label(class_):
    class_= re.sub(r'[^\w\s]', '', class_)
    class_ = class_.replace('_','')
    class_ = class_.replace(' ','')
    df=pd.read_csv(PATH_labelmap,names=['class'])
    df['class']=df['class'].str.replace(' ', '')
    df["class"] = df['class'].str.replace('[^\w\s]','')
    ret_= df.index[df['class']==class_]
    return ret_[0]

def sample_class_paths(PATH, nvids):
    d_vid={}
    root,dirs,files=next(os.walk(PATH))
    for dir in dirs:
        root=str(root)
        dir=str(dir)
        _,_,file=next(os.walk(root+'/'+dir))
        d_vid['{}'.format(dir)]=[file[:nvids]]
    return d_vid

def initialize_model(checkpoint):
  model = torch.load(checkpoint)
  model = model.eval()
  return model

def parse_video(PATH):
    video=rv(PATH)
    samp=video[0]
    samp1= samp.type(torch.FloatTensor).permute(3,0,1,2)
    return samp1

def eval(num_videos = 8, num_classes = 400, checkpoint = None):

  with torch.no_grad():
    true_labels = []
    pred_probs = []   
    if(checkpoint is None):
      print("Please provide checkpoint path")
      return False

    model = ResNet2DFramewise().cuda()
    state_dict = torch.load(PATH_model)
    model.load_state_dict(state_dict["model"])

    dict_paths = sample_class_paths(PATH_files, num_videos)
    counter = 0
    for keys in dict_paths.keys():
      for files in dict_paths[keys][0]:
        true_labels.append(class_to_label(keys))
        vid_tensor = parse_video(PATH_files+f'/{keys}/{files}')
        number_of_frames = vid_tensor.shape[1]
        desired_frame_number = number_of_frames//2
        desired_frame = torch.unsqueeze(vid_tensor[:,desired_frame_number,:,:].cuda(),dim=0)
        desired_frame = transform(desired_frame)
        model_logits = model(desired_frame).cuda()  
        model_probabs = torch.nn.functional.softmax(model_logits, dim=1)
        pred_probs.append(model_probabs)
        counter+=1
        print(f"Video number = {counter}")

    true_label_arr = np.array(true_labels)
    temp_tensor = torch.zeros(counter,num_classes)
    for i in range(counter):
      temp_tensor[i] = pred_probs[i].cpu()
    pred_probs_arr = np.array(temp_tensor)

    accuracy_top5 = tkscore(true_label_arr,pred_probs_arr,k=5)
    print(f"Top 5 accuracy is {accuracy_top5*100} %")
    return accuracy_top5

num_videos = 24

with torch.no_grad():
  acc = eval(checkpoint = PATH_model, num_videos)

# Grey Box P2

In [None]:
PATH_labelmap = "./label_map_k600.txt"
PATH_model =  './checkpoint/Greybox_P2.pt'  #Add desired model checkpoint 
PATH_files = './k600_val/'

#Imports
import torch
import glob
import pandas as pd
import os
import re
from torchvision.io import read_video as rv
import numpy as np
from sklearn.metrics import top_k_accuracy_score as tkscore


import warnings
warnings.filterwarnings("ignore")

"""# Load files"""

def class_to_label(class_):
    class_= re.sub(r'[^\w\s]', '', class_)
    class_ = class_.replace('_','')
    class_ = class_.replace(' ','')
    #print(class_)
    df=pd.read_csv(PATH_labelmap,names=['class'])
    df['class']=df['class'].str.replace(' ', '')
    df["class"] = df['class'].str.replace('[^\w\s]','')
    #print(df['class'])
    #print(class_)
    ret_= df.index[df['class']==class_]
    #print(ret_)
    return ret_[0]

def sample_class_paths(PATH, nvids):
    d_vid={}
    root,dirs,files=next(os.walk(PATH))
    for dir in dirs:
        root=str(root)
        dir=str(dir)
        _,_,file=next(os.walk(root+'/'+dir))
        d_vid['{}'.format(dir)]=[file[:nvids]]
    return d_vid

def initialize_model(checkpoint):
  model = torch.load(checkpoint)
  model = model.eval()
  return model

def parse_video(PATH):
    video=rv(PATH)
    samp=video[0]
    #print(samp.size())
    samp1= samp.type(torch.FloatTensor).permute(3,0,1,2)
    #samp1=samp1[:,:,:128,:128] Use in case of OOM errors
    #print(samp1.size())
    return samp1

def eval(num_videos = 8, num_classes = 400, checkpoint = None):
  with torch.no_grad():
    true_labels = []
    pred_probs = []   
    if(checkpoint is None):
      print("Please provide model checkpont")
      return False
    model = initialize_model(checkpoint)
    dict_paths = sample_class_paths(PATH_files, num_videos)
    counter = 0
    for keys in dict_paths.keys():
      for files in dict_paths[keys][0]:
        true_labels.append(class_to_label(keys))
        vid_tensor = parse_video(PATH_files+f'/{keys}/{files}') # Size(channels, frames, height, width)
        ## Error handling code for broken videos
        try:  
          model_logits = model(torch.unsqueeze(vid_tensor[:,:,:,:].cuda(),dim=1))
          #model_logits = model(torch.unsqueeze(vid_tensor[:,:32,:,:].cuda(),dim=0))  #Use in case of OOM error
        except:
          model_logits = torch.zeros(1,num_classes)
        model_probabs = torch.nn.functional.softmax(model_logits,dim=0)
        pred_probs.append(model_probabs)
        counter+=1
        print(f"Video number = {counter}")

    true_label_arr = np.array(true_labels) # (3200,1)
    temp_tensor = torch.zeros(counter,num_classes)
    for i in range(counter):
      temp_tensor[i] = pred_probs[i].cpu()
    pred_probs_arr = np.array(temp_tensor)

    accuracy_top5 = tkscore(true_label_arr,pred_probs_arr,k=5)
    print(f"Top 5 accuracy is {accuracy_top5*100} %")
    return accuracy_top5

In [None]:
num_classes = 600 #Kinetics 600 dataset
num_videos = 24 #Number of videos from each class to pick for evaluation

with torch.no_grad():
  acc = eval(checkpoint = PATH_model, num_videos = num_videos, num_classes = num_classes)