Copyright &copy; University of Strasbourg. All Rights Reserved.

<div>
<a href="https://cholectriplet2021.grand-challenge.org/">
<img src="https://raw.githubusercontent.com/DpkApt/evis_at/master/pictures/header.png" align="left"/>
</a>
</div>

## <h1><center>Getting Started</center></h1>


# Introduction

In this notebook, we provide sample code to help familiarize yourself with the challenge, the dataset and the metrics. These are minimal examples to help illustrate a simple deep learning pipeline applied on a small subset of the Action Triplet dataset, **CholecT50**.

# Imports

In [None]:
# Tensorflow contains functions needed to build and train neural networks
# Import relevant libraries

from PIL import Image

import csv
import matplotlib.pyplot as plt
import numpy as np
import os
import pprint
import random
import tensorflow as tf


print("Libraries successfully imported!")

# Data Loading and Visualization

In [None]:
# Download and extract the dataset from an online repository 

DATA_URL = (
    'https://seafile.unistra.fr/f/8c8a776b74ac4eac9274/?dl=1'
)
dataset_path = tf.keras.utils.get_file('CholecT50-sample.zip', DATA_URL, extract=True)
  
#Stores the dataset in the variable "path"
dataset_path = dataset_path.strip('.zip')  

data_path = os.path.join(dataset_path, 'data')
triplet_path = os.path.join(dataset_path, 'triplet')
dict_path = os.path.join(dataset_path, 'dict')
video_names = os.listdir(data_path)                                   

print("Dataset successfully extracted!")

In [None]:
# Create dictionary mapping triplet ids to readable label
with open(os.path.join(dict_path, 'triplet.txt'), 'r') as f:
  triplet_info = f.readlines()
  triplet_dict = {}
  for l in triplet_info:
    triplet_id, triplet_label = l.split(':')
    triplet_dict[int(triplet_id)] = triplet_label.rstrip()

print('Random triplet id and its human readable label\n')
random_triplet_id = np.random.choice(list(triplet_dict.keys()))
print('Triplet id: ', random_triplet_id, '\nReadable label: ', triplet_dict[random_triplet_id])

In [None]:
def generator(data_path, triplet_path, video_names, shuffle=False):
  while True:
    if shuffle:
      video_names = np.random.shuffle(video_names)

    for video_name in video_names:
      with open(os.path.join(triplet_path, video_name + '.txt'), mode='r') as infile:
          reader = csv.reader(infile)
          for line in reader:
            line = np.array(line, np.uint8)
            frame_id, triplet_label = line[0], line[1:]
            image_path = os.path.join(data_path, video_name, "%06d.png" %frame_id)
            image = np.array(Image.open(image_path), np.float32) / 255.0
            yield image, triplet_label

In [None]:
gen = generator(data_path, triplet_path, video_names)
for image, triplet_label in gen:
  print('Visualizing image...\n')
  plt.imshow(image)
  plt.show()
  print('\nEncoding showing which of the 100 considered action triplets are represented in the image\n')
  print(triplet_label)
  print('\nReadable labels\n')
  for triplet in np.where(triplet_label)[0]:
    print(triplet_dict[triplet])
  break

#  Building and running models 

Defining a shallow neural network using tf.keras 

In [None]:
# Defining the neural network architecture
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(
    filters=16, kernel_size=3, strides=2, activation="relu", input_shape=(480, 854, 3))
)                 
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides = 2, activation="relu")) 
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides = 2, activation="relu")) 
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides = 2, activation="relu")) 
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides = 2, activation="relu")) 
model.add(tf.keras.layers.Flatten())                                     
model.add(tf.keras.layers.Dense(units=4096, activation="relu"))                 
model.add(tf.keras.layers.Dense(units=2048, activation="relu"))            
model.add(tf.keras.layers.Dense(units=100, activation="sigmoid"))    

print("Neural network architecture successfully defined!")

In [None]:
model.build([1, 480, 854, 3])
model.summary()

In [None]:
input_4d = np.expand_dims(image, axis=0)
print('Performing a simple forward pass on our untrained network for a test image')
plt.imshow(image)
plt.show()
print('\nPrediction\n')
print(model.predict(input_4d)[0])
print('\nLabel\n')
print(triplet_label)

#  Metrics and evaluation 