In [1]:
import os
# Changing the working directory
os.chdir('../..')
os.getcwd()

'/home/javiermunoz/TFM_DSBD'

In [2]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False

import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices("GPU")
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

import matplotlib
import matplotlib.pyplot as plt
matplotlib.rc("figure", figsize=(15, 5))

import numpy as np
import pandas as pd

In [3]:
leaderboard = pd.read_csv("data/leaderboard.csv")
leaderboard

Unnamed: 0,model_type,model_name,train_acc,val_acc,test_acc,train_loss,val_loss,test_loss,epoch,path_to_model,subset_size
0,mediapipe,mp,0.95,0.92,0.93,0.22,0.28,0.28,77.0,src/mediapipe/bestmodels/best_mp_77_0.92_10_mo...,10.0
1,convlstm,simple,0.96,0.93,0.93,0.15,0.2,0.19,39.0,src/convlstm/bestmodels/best_simple_39_0.93_10...,10.0
2,convlstm,stateless,0.93,0.95,0.91,0.22,0.19,0.19,22.0,src/convlstm/bestmodels/best_stateless_22_0.95...,10.0
3,3DCNN,3dcnn,0.92,0.9,0.87,0.35,0.39,0.42,63.0,src/3DCNN/bestmodels/best_3dcnn_63_0.90_10_mod...,10.0


In [4]:
path_to_simple = leaderboard['path_to_model'][1]
path_to_stateless = leaderboard['path_to_model'][2]

model_to_test = tf.keras.models.load_model(path_to_stateless)
model_to_test.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 224,   0           []                               
                                224, 3)]                                                          
                                                                                                  
 rescaling (Rescaling)          (None, None, 224, 2  0           ['input_1[0][0]']                
                                24, 3)                                                            
                                                                                                  
 conv_lstm2d_8 (ConvLSTM2D)     (None, None, 222, 2  3200        ['rescaling[0][0]']              
                                22, 8)                                                        

In [5]:
from src.utils import load_dict
subset = 10
labels=load_dict(f'data/subset_{subset}_lsa_64/pickl_files/labels_map.pkl')
labels

{'Barbecue': 0,
 'Birthday': 1,
 'Buy': 2,
 'Chewing-gum': 3,
 'Coin': 4,
 'Milk': 5,
 'Mock': 6,
 'Realize': 7,
 'Sweet milk': 8,
 'To land': 9}

## Inference based on test videos

In [6]:
import cv2
from src.load_dataset_aux import format_frames
from numpy.random import default_rng

rng = default_rng()
threshold = 0.7
output_size = (224,224)

cap = cv2.VideoCapture('/home/javiermunoz/TFM_DSBD/data/subset_10_lsa_64/test/Mock/029_001_004.mp4')

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

captured_frames=[]
count=1 
ret=True
while ret:
    ret, frame = cap.read()
    captured_frames.append(frame)

cap.release()
cv2.destroyAllWindows()
    
frames2extract_idx = np.array(sorted(rng.choice(len(captured_frames), size=10, replace=False)))

frames_to_predict = []
for i in range(len(captured_frames)):
    if i in frames2extract_idx:
        frames_to_predict.append(captured_frames[i])

result = []
for frame in frames_to_predict:
    frame_formatted = format_frames(frame, output_size)
    result.append(frame_formatted)

result = np.array(result)[..., [2,1,0]]

logits = model_to_test.predict(np.expand_dims(result, axis=0))
if logits[logits > threshold].any():
    pred = np.argmax(logits)
    pred_label = [k for k, v in labels.items() if v == pred]
    print(pred_label)

captured_frames = []


['Mock']


## Inference based on video camera

In [None]:
import cv2
from src.load_dataset_aux import format_frames
from numpy.random import default_rng

rng = default_rng()
threshold = 0.7
output_size = (224,224)

cap = cv2.VideoCapture(0)

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

captured_frames=[]
count=1 
ret=True
while ret:
    ret, frame = cap.read()
    captured_frames.append(frame)
    if count % 80 == 0:
        frames2extract_idx = np.array(sorted(rng.choice(len(captured_frames), size=10, replace=False)))
        
        frames_to_predict = []
        for i in range(len(captured_frames)):
            if i in frames2extract_idx:
                frames_to_predict.append(captured_frames[i])


        result = []
        for frame in frames_to_predict:
            frame_formatted = format_frames(frame, output_size)
            result.append(frame_formatted)
        
        result = np.array(result)[..., [2,1,0]]

        logits = model_to_test.predict(np.expand_dims(result, axis=0))
        print(logits)
        if logits[logits > threshold].any():
            pred = np.argmax(logits)
            pred_label = [k for k, v in labels.items() if v == pred]
            print(pred_label)

        captured_frames = []

    count += 1
    # Show to screen
    cv2.imshow('OpenCV Feed', frame)

    # Break gracefully
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
