# Creation of Doppler datasets for CNN models from spectogram images
removes corrupt files from subject F (last 20%)

In [1]:
import os
if os.getcwd() == '/content':
    from google.colab import drive
    drive.mount('/content/gdrive')
    BASE_PATH = '/content/gdrive/My Drive/Level-4-Project/'
    os.chdir('gdrive/My Drive/Level-4-Project/')
    
elif os.getcwd() == 'D:\\Google Drive\\Level-4-Project\\notebooks' or os.getcwd() == 'D:\\Google Drive\\Level-4-Project\\src\\features':
    BASE_PATH = "D:/Google Drive/Level-4-Project/"
    
else:
    BASE_PATH = "/export/home/2192793m/Level-4-Project/"
    
INTERIM_PATH = BASE_PATH + 'data/interim/doppler_spectrograms/'
PROCESSED_PATH = BASE_PATH + 'data/processed/doppler_spectrograms_without_corrupt/'
if not os.path.exists(PROCESSED_PATH):
    os.makedirs(PROCESSED_PATH)

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
from PIL import Image
import glob

In [3]:
IMAGE_SIZE = (75, 75)

In [4]:
def process_image(path):
    """
    Open a spectrogram image, resize it and convert to greyscale
    :param path: path to image
    :type path: str
    :return: converted image
    :rtype: image as a numpy array
    """
    img = Image.open(path)  # open image 150x150
    img = img.resize(IMAGE_SIZE, Image.ANTIALIAS)  # resize to 75x75
    img = img.convert('L')  # convert to greyscale
    img = (np.array(img))  # convert to numpy array 
    return img

In [5]:
def convert_label_to_int(label):
    """
    Returns an integer to represent a given action
    :param label: Action
    :type label: str
    :return: number associated with the action
    :rtype: int
    """
    if label == "walking":
        return 0
    if label == "pushing":
        return 1
    if label == "sitting":
        return 2
    if label == "pulling":
        return 3
    if label == "circling":
        return 4
    if label == "clapping":
        return 5
    if label == "bending":
        return 6

In [6]:
# Specify window time length here
WINDOW_LENGTH = "3"
# WINDOW_LENGTH = "2"
# WINDOW_LENGTH = "1_5"
# WINDOW_LENGTH = "1"

# Split by User
* Datasets for 1, 1.5, 2 and 3 seconds
* 0 degrees
* All movemnets
* Train on all but one user, test on remaining user

In [7]:
user_a_data = []
user_a_labels = []
user_b_data = []
user_b_labels = []
user_c_data = []
user_c_labels = []
user_d_data = []
user_d_labels = []
user_e_data = []
user_e_labels = []
user_f_data = []
user_f_labels = []

In [8]:
for path in glob.glob(INTERIM_PATH + WINDOW_LENGTH + "/*/0/*/*.png"):
    split_path = path.split("\\")
    user = split_path[-4]
    label = split_path[-2]
    
    if user == "A":
        user_a_labels.append(convert_label_to_int(label))
        user_a_data.append(process_image(path))
    
    elif user == "B":
        user_b_labels.append(convert_label_to_int(label))
        user_b_data.append(process_image(path))
        
    elif user == "C":
        user_c_labels.append(convert_label_to_int(label))
        user_c_data.append(process_image(path))        

    elif user == "D":
        user_d_labels.append(convert_label_to_int(label))
        user_d_data.append(process_image(path)) 
        
    elif user == "E":
        user_e_labels.append(convert_label_to_int(label))
        user_e_data.append(process_image(path)) 
        
    elif user == "F":
        # remove final 20% of each recording
        # 570 files per recording
        # 570 * 0.2 = 114
        # 570 - 114 = 456
        # remove 456 onwards as corrupt
        file_num = split_path[-1].split('.')[-2]
        sub_num = int(file_num.split("_")[-1])
        if sub_num < 456:
            user_f_labels.append(convert_label_to_int(label))
            user_f_data.append(process_image(path)) 

In [9]:
def save_user_set(user_letter, user_data, user_labels, window_length):
    """
    Save the data and labels associated with a subject (user and subject are the same thing).
    :param user_letter: Letter for the subject
    :type user_letter: str
    :param user_data: all images belonging to the subject
    :type user_data: array of images represented as numpy arrays
    :param user_labels: all labels referring to the user_data
    :type user_labels: array of integers representing the labels in the same order as user_data
    :param window_length: Length of the spectrograms in seconds
    :type window_length: int
    """
    save_path = PROCESSED_PATH + window_length + '/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    with open(save_path + user_letter + "_data.pkl", "wb") as data_file:
        pickle.dump(np.array(user_data, np.uint8), data_file)
        
    with open(save_path + user_letter + "_labels.pkl", "wb") as labels_file:
        pickle.dump(np.array(user_labels, np.uint8), labels_file)

## Save Users Datasets

In [10]:
save_user_set('A', user_a_data, user_a_labels, WINDOW_LENGTH)

In [11]:
save_user_set('B', user_b_data, user_b_labels, WINDOW_LENGTH)

In [12]:
save_user_set('C', user_c_data, user_c_labels, WINDOW_LENGTH)

In [13]:
save_user_set('D', user_d_data, user_d_labels, WINDOW_LENGTH)

In [14]:
save_user_set('E', user_e_data, user_e_labels, WINDOW_LENGTH)

In [15]:
save_user_set('F', user_f_data, user_f_labels, WINDOW_LENGTH)