# Dataset pre-processing

In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from collections import OrderedDict
from tqdm import tqdm, trange
import wp8.pre_processing.utils as utils
# from pre_processing.template_match import TemplateMatch
from wp8.pre_processing.generate_labels import LabelsGenerator
from wp8.pre_processing.process_dataset import ProcessDataset

## Timestamps Extraction from video frames
Extracting timestamps and datalogger times from each frame of each video

In [None]:
curr_video = './data/videos/Actor_1_Bed_PH CAM 8.mp4'
timestamp_roi = (47, 85, 811, 150)
tm = TemplateMatch(video_path=curr_video, element_type='timestamp', timestamp_roi=timestamp_roi)


In [None]:
cap = cv2.VideoCapture(curr_video)
_, first_frame = cap.read()
cap.release()

tm.template_match(first_frame, test=True)

In [None]:
timestamps = tm.extract_timestamps()

In [None]:
timestamps[:10]

In [None]:
timestamps_serie = pd.Series(timestamps)
timestamps_serie.to_excel('outputs/Actor_1_Bed_PH_timestamps.xlsx',sheet_name='Actor_1_Bed_PH')

## Labels file generation

In [2]:
gl = LabelsGenerator(json_dir="../data/labels_json/")
gl.generate_labels()

[INFO] Found 50 JSON files


100%|██████████| 50/50 [06:26<00:00,  7.74s/it]


## Features Extraction and Dataset excel creation

In [None]:
videos_folder = '/Volumes/HDD ESTERNO Andrea/DATASET WP8'

feature_extractor = tf.keras.applications.InceptionV3(
    weights="imagenet",
    include_top=False,
    pooling="avg",
    input_shape=(224, 224, 3),
)

ds = ProcessDataset(videos_folder=videos_folder, feature_extractor=feature_extractor, preprocess_input=tf.keras.applications.inception_v3.preprocess_input)
ds.extract_frames()


In [None]:
df = pd.read_excel("outputs/labels/labels.xlsx",
                   sheet_name="actor_1_bed_full_ph", index_col=0)

# df1

# augmented = pd.concat([df] * 7, ignore_index=True)
# augmented

In [None]:
df.shape
df1 = pd.concat([df]*8)
df1.shape

In [None]:
names = ["test"] * 4920

In [None]:
df1["frame_name"] = pd.Series(names)


In [9]:
features = np.load("/Users/andrea/Documents/Github/WP8_refactoring/wp8/outputs/dataset/features/Actor_1_Bed.npy")
dataset = pd.read_csv("/Users/andrea/Documents/Github/WP8_refactoring/wp8/outputs/dataset/dataset/full_dataset.csv")

print(f"dataset shape: {dataset.shape}\nfeatures shape: {features.shape}")
      
dataset["features"] = pd.Series(list(features))
dataset.head()


dataset shape: (8640, 5)
features shape: (8640, 2048)


Unnamed: 0.1,Unnamed: 0,micro_labels,macro_labels,ar_labels,frame_name,features
0,0,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0000,"[0.0, 1.3650033, 0.19758935, 0.46625867, 0.284..."
1,1,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0001,"[0.0, 1.3973647, 0.21190469, 0.39370242, 0.277..."
2,2,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0002,"[0.0, 1.5013084, 0.21771781, 0.40439475, 0.304..."
3,3,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0003,"[0.0, 1.4786197, 0.20376159, 0.2727906, 0.2843..."
4,4,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0004,"[0.0, 1.3080854, 0.15197416, 0.32774755, 0.305..."
