# CS3264 Assigment 2
This is my implementation for assignment 2, my approach is to use ResNet to extract the features from the dataset's training images, then train a one vs all logisitic classifier for each class. Github code can be seen [here](https://github.com/Charles1026/CS3264)

## Setup Code

In [1]:
import numpy as np
import os
from tqdm import tqdm
from typing import List, Tuple

import torch
import torch.nn as nn
import torchvision.io as io
from torchvision.models.resnet import resnet18, ResNet18_Weights, ResNet
import torchvision.transforms as tx

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

CLASS_NAME_TO_ID_MAP = {
  "002_master_chef_can": 0,
  "003_cracker_box": 1,
  "004_sugar_box": 2,
  "005_tomato_soup_can": 3,
  "006_mustard_bottle": 4,
  "007_tuna_fish_can": 5,
  "008_pudding_box": 6,
  "009_gelatin_box": 7,
  "010_potted_meat_can": 8,
  "011_banana": 9,
  "019_pitcher_base": 10,
  "021_bleach_cleanser": 11,
  "024_bowl": 12,
  "025_mug": 13,
  "035_power_drill": 14,
  "036_wood_block": 15,
  "037_scissors": 16,
  "040_large_marker": 17,
  "051_large_clamp": 18,
  "052_extra_large_clamp": 19,
  "061_foam_brick": 20
}

INPUT_DIR = "/kaggle/input/cs3264-assignment-2-ay2425s1"
TRAIN_DATSET_DIR = os.path.join(INPUT_DIR, "ycb_dataset", "train_data")
TEST_DATSET_DIR = os.path.join(INPUT_DIR, "ycb_dataset", "test_data")
FEATURES_OUTPUT_FILE = os.path.join(INPUT_DIR, "features", "combined_features.npz")
TRAIN_DATA_FILE = os.path.join(INPUT_DIR, "features", "train_features.npz")
TEST_DATA_FILE = os.path.join(INPUT_DIR, "features", "test_features.npz")
MODEL_WEIGHTS_FILE = os.path.join(INPUT_DIR, "features", "model_weights.npz")
TEST_OUTPUT_FILE = os.path.join("/kaggle/working/submission.csv")


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
RESNET_MEAN = torch.tensor([0.485, 0.456, 0.406])
RESNET_STD = torch.tensor([0.229, 0.224, 0.225])
RESNET_RESIZER = tx.Resize((224, 224))

def createResNetNoFCLayer():
  resnet = resnet18(weights = ResNet18_Weights.DEFAULT)
  resnet.fc = nn.Identity()

  resnet = resnet.to(DEVICE)
  return resnet.eval()

class MultiLabelPredictor(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.fc = nn.Linear(512, len(CLASS_NAME_TO_ID_MAP), bias=True)
    self.sigmoid = torch.nn.Sigmoid()
    
  def __init__(self, intercepts: torch.Tensor, coefs: torch.Tensor) -> None:
    super().__init__()
    self.fc = nn.Linear(512, len(CLASS_NAME_TO_ID_MAP), bias=True)
    self.sigmoid = torch.nn.Sigmoid()
    self.fc.bias.data = intercepts
    self.fc.weight.data = coefs
    
  def forward(self, x):
    x = self.fc(x)
    return self.sigmoid(x)

def createResNetCustomFCLayer(intercepts: torch.Tensor, coefs: torch.Tensor):
  predictor = MultiLabelPredictor(intercepts, coefs)
  
  resnet = resnet18(weights = ResNet18_Weights.DEFAULT)
  resnet.fc = predictor

  resnet = resnet.to(DEVICE)
  return resnet.eval()

#TODO: Test if resize helps with feature extraction 
def prepImageforResnet(img: torch.Tensor):
  img = img.float() / 255
  img = (img - RESNET_MEAN[:, None, None]) / RESNET_STD[:, None, None]
  img= img.unsqueeze(0)
  return img.to(DEVICE)
  

## Feature Extraction Code

In [2]:
@torch.no_grad
def loadAndExtractFeaturesAndLabels(resnet:ResNet, rootDir: str, loadLabels: bool = False):
  featuresList: List = []
  labelsList: List = []

  for root, subdirs, files in tqdm(os.walk(rootDir), desc = "Extracing Features"):
    for file in files:
      fileName, fileExt = os.path.splitext(file)
      if (fileExt != ".png"): continue
      
      img = io.read_image(os.path.join(root, file), io.ImageReadMode.RGB)
      img = prepImageforResnet(img)
      imgFeatures = resnet(img)
      
      featuresList.append(imgFeatures)
      
      if loadLabels:
        fileNum = fileName.split("-")[0]
        with open(os.path.join(root, f"{fileNum}-box.txt"), "r") as labelFile:
          imgLabels = torch.zeros(len(CLASS_NAME_TO_ID_MAP))
          for line in labelFile:
            objName, *boundingBox = line.split()
            imgLabels[CLASS_NAME_TO_ID_MAP[objName]] = 1
            
          labelsList.append(imgLabels.unsqueeze(0))
          
  if loadLabels:
    return torch.cat(featuresList).cpu().numpy(), torch.cat(labelsList).numpy()
  else:
    return torch.cat(featuresList).cpu().numpy()

## Split Data

In [3]:
def splitData(features: np.ndarray, labels: np.ndarray, trainRatio: float = 0.8) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
  return train_test_split(features, labels, train_size = trainRatio)

## Train Model

In [4]:
def trainModel(features: np.ndarray, labels: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
  interceptList: List = []
  coefList: List = []
  for classLabels in tqdm(labels.transpose(), desc="Training Models"):
    model = LogisticRegression(solver="liblinear")
    model.fit(features, classLabels)
    interceptList.append(model.intercept_)
    coefList.append(model.coef_)
    
  return np.stack(interceptList), np.stack(coefList)

## Validate Model

In [5]:
def validateModel(model: MultiLabelPredictor, features: torch.Tensor, labels: torch.Tensor):
  predY = model(features.to(DEVICE)).cpu()
  predLabels = torch.round(predY)
  
  print(f"{torch.sum(torch.abs(labels - predLabels)).int()} / {labels.shape[0] * labels.shape[1]} incorrect matches")
  
  totalF1Score = 0
  for labelIdx in range(labels.shape[1]):
    tn = 0.0
    fn = 0.0
    fp = 0.0
    tp = 0.0
    for sampleIdx in range(labels.shape[0]):
      if (predLabels[sampleIdx][labelIdx] == 0) and (labels[sampleIdx][labelIdx] == 0):
        tn += 1
        
      elif (predLabels[sampleIdx][labelIdx] == 0) and (labels[sampleIdx][labelIdx] == 1):
        fn += 1
        
      elif (predLabels[sampleIdx][labelIdx] == 1) and (labels[sampleIdx][labelIdx] == 0):
        fp += 1
        
      elif (predLabels[sampleIdx][labelIdx] == 1) and (labels[sampleIdx][labelIdx] == 1):
        tp += 1
        
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * ((precision * recall) / (precision + recall))
    totalF1Score += f1
    
  return totalF1Score / labels.shape[1]

## Predict

In [6]:
@torch.no_grad
def predict(predDir: str, model: ResNet):
  
  predList: List = [] 
  for videoId in tqdm(os.listdir(predDir), desc = "Predicting"):    
    videoDir = os.path.join(predDir, videoId)
    if not os.path.isdir(videoDir): continue
    
    for imgId in os.listdir(videoDir):
      imgFile = os.path.join(videoDir, imgId)
      
      img = io.read_image(imgFile, io.ImageReadMode.RGB)
      img = prepImageforResnet(img)
      yPred = model(img)
      predLabels = torch.round(yPred).int()
      
      imgName = imgId.split("-")[0]
      predList.append((f"{videoId}_{imgName}", predLabels.squeeze().cpu().numpy()))

  return predList

def savePredictions(outFile: str, predictions: List[Tuple[str, np.ndarray]]):
  with open(outFile, "w") as file:
    file.write("img_id,class_0,class_1,class_2,class_3,class_4,class_5,class_6,class_7,class_8,class_9,class_10,class_11,class_12,class_13,class_14,class_15,class_16,class_17,class_18,class_19,class_20\n")
    
    for name, labels in predictions:
      file.write(f"{name},")
      for idx, label in enumerate(labels):
        file.write(str(label))
        if (idx < labels.shape[0] - 1):
          file.write(",")
      file.write("\n")

## Run Workflow

In [7]:
# Feature Extraction
print("Loading ResNet")
resNetNoFC = createResNetNoFCLayer()

features, labels = loadAndExtractFeaturesAndLabels(resNetNoFC, TRAIN_DATSET_DIR, loadLabels = True)
print(f"Extracted the features and labels for {features.shape[0]} images")

# Split Data
print("Splitting Data")
X_train, X_test, y_train, y_test = splitData(features, labels, 0.9)

# Train Model
intercepts, coefs = trainModel(X_train, y_train)

# Validate Model
logClassifier = MultiLabelPredictor(torch.tensor(intercepts, dtype = torch.float32).squeeze(), torch.tensor(coefs, dtype = torch.float32).squeeze()).to(DEVICE)
macroF1Metric = validateModel(logClassifier, torch.tensor(X_test), torch.tensor(y_test))
print(f"Model has macro F1 metric of {macroF1Metric} on validation data.")

# 
resnet = createResNetCustomFCLayer(torch.tensor(intercepts, dtype = torch.float32).squeeze(), torch.tensor(coefs, dtype = torch.float32).squeeze())
predictions = predict(TEST_DATSET_DIR, resnet)
savePredictions(TEST_OUTPUT_FILE, predictions)

Loading ResNet


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 193MB/s]
Extracing Features: 53it [01:11,  1.36s/it]


Extracted the features and labels for 2000 images
Splitting Data


Training Models: 100%|██████████| 21/21 [00:07<00:00,  2.95it/s]


35 / 4200 incorrect matches
Model has macro F1 metric of 0.9808469678302297 on validation data.


Predicting: 100%|██████████| 14/14 [00:14<00:00,  1.04s/it]
