In [None]:
!pip install -r requirements.txt

# Classification Tasks with Kinematic Time Series from Head-Mounted Displays

## **Load and preprocess datasets**

In [31]:
# Add files to sys.path
from pathlib import Path
import sys,os
this_path = None
try:
    this_path = str(os.path.dirname(__file__)) #str(Path().absolute())+"/" # str(os.path.dirname(__file__))
except:
    this_path = str(Path().absolute())+"/" #str(Path().absolute())+"/" # str(os.path.dirname(__file__))
print("File Path:", this_path)
sys.path.append(os.path.join(this_path, "kinemats"))

# Enable debugger in IPython with command set_trace()
#from IPython.core.debugger import set_trace

# Import classes
import utils  # Utils for generation of files and paths

from data_loader import dataset_Tsinghua

# Import data science libs
import numpy as np
import pandas as pd

import matplotlib
#matplotlib.rcParams['text.usetex'] = True
#%matplotlib inline
import matplotlib.pyplot as plt

File Path: C:\Users\darksoul\Downloads\head-motion-classification-AIVR21-/


---
# SETUP

In [32]:
# CONSTANTS
import experiment_config
from experiment_config import Datasets


---
# UTILITY FUNCTIONS

Generate paths to write output files

In [33]:
STR_DATASET = str(experiment_config.DATASET_MAIN)+"/"
print(STR_DATASET)
def gen_path_plot(filename):
    # Generates full paths for PLOTS just by specifying a name
    return utils.generate_complete_path(filename, \
                                        main_folder=experiment_config.PLOT_FOLDER, \
                                        subfolders=STR_DATASET+NOTEBOOK_SUBFOLDER_NAME, \
                                        file_extension=experiment_config.IMG_FORMAT, save_files=experiment_config.EXPORT_PLOTS)

def gen_path_temp(filename, subfolders="", extension=experiment_config.TEMP_FORMAT):
    # Generates full paths for TEMP FILES just by specifying a name
    return utils.generate_complete_path(filename, \
                                        main_folder=experiment_config.TEMP_FOLDER, \
                                        subfolders=STR_DATASET+subfolders, \
                                        file_extension=extension)

def gen_path_dataset(filename, subfolders="", extension=""):
    # Generates full paths for RESULTS FILES (like pandas dataframes)
    return utils.generate_complete_path(filename, \
                                        main_folder=experiment_config.DATASET_FOLDER, \
                                        subfolders=STR_DATASET+subfolders, \
                                        file_extension=extension)

Tsinghua/


# Load and preprocess datasets

# Tsinghua

In [34]:
if experiment_config.DATASET_MAIN == Datasets.Tsinghua:

    # Original compressed dataset
    dataset_path = experiment_config.DATASET_TSINGHUA_ZIP
    # Path of JSON dictionary used to store the data per user
    dict_json_name = gen_path_temp('files_index_per_user', extension=".json")

    data = dataset_Tsinghua.DatasetHeadMovTsinghua(dataset_path, dict_json_name)

In [35]:
if experiment_config.DATASET_MAIN == Datasets.Tsinghua:
    # Filename of the file containing demographics and HMD movements data
    demographics_data_filename = experiment_config.DATASET_DEMOGRAPHICS
    original_data_filename= gen_path_temp("hmd_movements", extension=".pickle")


    ### INPUTS / OUTPUTS
    """EDIT CUSTOM FILENAMES"""
    input_files = [demographics_data_filename, original_data_filename]

    RELOAD_TRIES = experiment_config.RELOAD_TRIES
    # Try to load files maximum two times
    for tries in range(RELOAD_TRIES):
        try:
            ### LOAD FILE
            print(f"Trying {tries+1}/{RELOAD_TRIES} to load files: {input_files}")
            
            ### CUSTOM SECTION TO READ FILES
            """EDIT CUSTOM READ"""
            data.demographics = pd.read_csv(input_files[0]) # data.general is a pd.DataFrame
            print(f"File {input_files[0]} was successfully loaded")
            data.original_data = utils.load_pickle(input_files[1]) # data.movement is a Dictionary
            print(f"File {input_files[1]} was successfully loaded")

        except FileNotFoundError as e:
            ### CREATE FILE
            print(f"File not found. Creating again! {e}")

            ### CUSTOM SECTION TO CREATE FILES 
            """EDIT CUSTOM WRITE"""
            # Create JSON with dictionary of structured data
            data.generate_file_index()
            # Transform the paths in the compressed file into bytes
            data.uncompress_data(#debug_users = 15,                      # Load just this users for test purposes
                                 #list_unprocessed_users = skip_users_indices     # Users ID with empty data
                                )

            # # Delete head-movement data of specific video keys
            # data.delete_data_from_videos(videos_to_delete)
            # print("Removing data from specific video keys... Done!")

            # Save files
            data.demographics.to_csv(input_files[0], index=False)
            utils.create_pickle(data.original_data, input_files[1])

            ### ---- CONTROL RETRIES
            if tries+1 < RELOAD_TRIES:
                continue
            else:
                raise
        break

Trying 1/2 to load files: ['./dataset/Tsinghua/demographics.csv', './temp/Tsinghua/hmd_movements.pickle']
File ./dataset/Tsinghua/demographics.csv was successfully loaded
File ./temp/Tsinghua/hmd_movements.pickle was successfully loaded


---
## Data Synchronization with data interpolation
***Generate CSV file with summary of sampling frequency and duration***: The CSV file defines the criteria to resample all time series in common length.

In [36]:
# Filename of the file containing demographics and HMD movements data
sampling_stats_filename = experiment_config.DATASET_SUMMARY # Original sampling stats

### INPUTS / OUTPUTS
"""EDIT CUSTOM FILENAMES"""
input_files = [sampling_stats_filename]

# Try to load files maximum two times
for tries in range(RELOAD_TRIES):
    try:
        ### LOAD FILE
        print(f"Trying {tries+1}/{RELOAD_TRIES} to load files: {input_files}")
        
        ### CUSTOM SECTION TO READ FILES
        """EDIT CUSTOM READ"""
        sampling_stats = pd.read_csv(input_files[0]) # data.general is a pd.DataFrame
        print(f"File {input_files[0]} was successfully loaded")

    except FileNotFoundError as e:
        ### CREATE FILE
        print(f"File not found. Creating again! {e}")

        ### CUSTOM SECTION TO CREATE FILES 
        """EDIT CUSTOM WRITE"""
        # Summary of original sampling frequencies
        sampling_stats = data.create_original_sampling_summary()
        sampling_stats.to_csv(input_files[0], index=False)

        ### ---- CONTROL RETRIES
        if tries+1 < RELOAD_TRIES:
            continue
        else:
            raise
    break

print(sampling_stats.head())

Trying 1/2 to load files: ['./dataset/Tsinghua/summary_timeseries.csv']
File ./dataset/Tsinghua/summary_timeseries.csv was successfully loaded
   experiment  user  video  startingTime  endTime      N   magQuat  \
0           0     1      0         1.247  164.203  14726  1.000005   
1           0     1      1         0.000  201.141  18180  0.999997   
2           0     1      2         0.021  293.239  26272  1.000006   
3           0     1      3         0.000  172.577  15478  0.999998   
4           0     1      4         0.021  205.708  18443  1.000005   

   avTsampling  avFsampling  
0     0.011066    90.367952  
1     0.011064    90.384357  
2     0.011161    89.598865  
3     0.011150    89.687502  
4     0.011153    89.665365  


## SLERP (Spherical Linear Interpolation)

Slerp is shorthand for spherical linear interpolation. It refers to constant-speed motion along a unit-radius great circle arc, given the ends and an interpolation parameter. "A major appeal is that interpolation is carried out as a rotation about a fixed axis at constant angular velocity" [REF,pg.18](http://web.cs.iastate.edu/~cs577/handouts/quaternion.pdf)

Let $p_{0}$ and $p_{1}$ be the first and last points in the arc, let $t$ be the parameter where $0 \le t \le 1$. Compute $\Omega$ as the angle subtended by the arc so that $cos \Omega = p_{0} \cdot p_{1}$

$Slerp(p_{0},p_{1};t) = \frac{sin[(1-t)\Omega]}{sin(\Omega)}\cdot p_{0} + \frac{sin(t\Omega)}{sin(\Omega)}\cdot p_{1}$



In [37]:
if experiment_config.DATASET_MAIN == Datasets.Tsinghua:
    SAMPLING_FREQUENCY = 30
    STARTING_TIME_SECS = 35
    ENDING_TIME_SECS = 155

    EXPERIMENT_ID = 0 # 0: Experiment_1: No instructions to look at video ROI >> 1: Experiment_2: Instruction to focus on video ROI;;; Check dataset paper for description 

This interpolation is common for both

In [38]:
# Structure with resampled time-series
movement_resampled_data_filename = gen_path_temp("hmd_movements_resampled", extension=".pickle")

### INPUTS / OUTPUTS
"""EDIT CUSTOM FILENAMES"""
input_files = [movement_resampled_data_filename]

# Try to load files maximum two times
for tries in range(RELOAD_TRIES):
    try:
        ### LOAD FILE
        print(f"Trying {tries+1}/{RELOAD_TRIES} to load files: {input_files}")
        
        ### CUSTOM SECTION TO READ FILES
        """EDIT CUSTOM READ"""
        data.processed = utils.load_pickle(input_files[0]) 
        print(f"File {input_files[0]} was successfully loaded")

    except FileNotFoundError as e:
        ### CREATE FILE
        print(f"File not found. Creating again! {e}")

        ### CUSTOM SECTION TO CREATE FILES 
        """EDIT CUSTOM WRITE"""
        
        
        if experiment_config.DATASET_MAIN == Datasets.Tsinghua:
            data.resample_movement(experiment_id = EXPERIMENT_ID, sampling_frequency = SAMPLING_FREQUENCY, starting_time = STARTING_TIME_SECS, end_time = ENDING_TIME_SECS)

        # Create pickle file with resampled head-movement data
        utils.create_pickle(data.processed, input_files[0])

        ### ---- CONTROL RETRIES
        if tries+1 < RELOAD_TRIES:
            continue
        else:
            raise
    break

Trying 1/2 to load files: ['./temp/Tsinghua/hmd_movements_resampled.pickle']
File ./temp/Tsinghua/hmd_movements_resampled.pickle was successfully loaded


# Dataset Tsinghua


In [39]:
if experiment_config.DATASET_MAIN == Datasets.Tsinghua: # Validate visually interpolation in some users
    # Summary of resampled head movement data
    num_users = len(data.processed.keys())
    videos_per_user = len(data.processed[1].keys())
    total_trajectories = num_users * videos_per_user
    video_data_rows, video_data_cols = data.processed[1][0].shape

    print("Total number of users",num_users)
    print("Total number of videos per user",videos_per_user)
    print("Total number of time series", total_trajectories )
    print("Head movement per video has size:", (video_data_rows, video_data_cols))

Total number of users 48
Total number of videos per user 9
Total number of time series 432
Head movement per video has size: (3601, 5)


In [40]:
if experiment_config.DATASET_MAIN == Datasets.Tsinghua: # Validate visually interpolation in some users
    # Data for combined time series to cluster
    labels_filename = experiment_config.DATASET_LABELS # Cluster index TRUE_LABEL
    timestamps_filename = experiment_config.DATASET_TIMESTAMPS # Timestamps
    dataset_filename = experiment_config.DATASET_DATA # Resampled data stats

    # Load or create dataframe with statistics of initial dataset (58 users, 5 videos)
    labels = None
    timestamps = None
    dataset = None

    ### INPUTS / OUTPUTS
    """EDIT CUSTOM FILENAMES"""
    input_files = [labels_filename, timestamps_filename, dataset_filename]

    # Try to load files maximum two times
    for tries in range(RELOAD_TRIES):
        try:
            ### LOAD FILE
            print(f"Trying {tries+1}/{RELOAD_TRIES} to load files: {input_files}")
            
            ### CUSTOM SECTION TO READ FILES
            """EDIT CUSTOM READ"""
            labels = pd.read_csv(input_files[0])
            print(f"File {input_files[0]} was successfully loaded")
            timestamps = np.loadtxt(input_files[1])
            print(f"File {input_files[1]} was successfully loaded")
            dataset = utils.load_binaryfile_npy(input_files[2])
            print(f"File {input_files[2]} was successfully loaded")

        except Exception as e:
            ### CREATE FILE
            print(f"File not found. Creating again! {e}")

            ### CUSTOM SECTION TO CREATE FILES 
            """EDIT CUSTOM WRITE"""
            ## Create DataFrame with labels
            labels_cols = ["id","user","videoId"]
            labels = np.empty((total_trajectories, len(labels_cols)))

            # All time series are resampled with the same timestamps, just pick one!
            timestamps = data.processed[1][0][:,0]

            # Contains all the trajectories in array,
            dataset = np.empty((total_trajectories, video_data_rows, video_data_cols - 1))  ## The timestamp is in a different array
            
            # Time series index, used to map them back the original series with their respective user and index.
            ts_idx = 0 
            # Put together all the structured time series in one numpy array to do distance calculations
            for user in range(1,num_users+1): #[0,1]: ### USERS exist in original data from from 1-48, not 0-47
                for video in data.processed[user].keys():
                    ## CHECK THAT ALL THE QUATERNIONS IN THE VIDEO HAVE MAGNITUDE 1. [Unit Quaternions]
                    magnitudes = [np.linalg.norm(data.processed[user][video][row,1:]) for row in range(data.processed[user][video].shape[0])]
                    [print("Quaternion norm not equal 1+/-0.01",val, "user:", user, "video", video, "row", i) for i,val in enumerate(magnitudes) if (val > 1.01 or val < 0.99)]

                    # Index of which time series corresponded to which video and which user
                    labels[ts_idx] = [ts_idx, user, video]

                    # Copy the original structured data in two np array with all the trajectories
                    dataset[ts_idx,:,:] = data.processed[user][video][:,1:] ## SKIP FIRST COLUMN

                    # Time-series Index, combining the structure per user, per video.
                    ts_idx += 1

            ## SAVE FILES
            # Create dataframe with time index
            labels = pd.DataFrame(data=labels, columns=labels_cols)
            labels.to_csv(input_files[0], index=False)
            print("Cluster index created at", input_files[0])

            # Save timestamps
            np.savetxt(input_files[1], timestamps, fmt='%f') # Supress scientific notation
            print("Timestamps created at",input_files[1])

            # Create pickle file with combined time-series for clustering
            utils.save_binaryfile_npy(dataset, input_files[2])
            print("Head movement resampled created at", input_files[2])


            ### ---- CONTROL RETRIES
            if tries+1 < RELOAD_TRIES:
                continue
            else:
                raise
        
        # Finish iteration
        break


Trying 1/2 to load files: ['./dataset/Tsinghua/labels.csv', './dataset/Tsinghua/timestamps.csv', './dataset/Tsinghua/dataset.npy']
File ./dataset/Tsinghua/labels.csv was successfully loaded
File ./dataset/Tsinghua/timestamps.csv was successfully loaded
File ./dataset/Tsinghua/dataset.npy was successfully loaded


In [41]:
print(">> FINISHED WITHOUT ERRORS!!")

>> FINISHED WITHOUT ERRORS!!
