<a href="https://colab.research.google.com/github/SanjeevKV/LearningKeras/blob/master/cnn1d_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import keras
from scipy.io import wavfile as wav
import pandas as pd
import numpy as np
import os
import sys
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv1D, BatchNormalization, MaxPooling1D, Dense, Activation, Flatten, LSTM
from keras import optimizers
import argparse

In [0]:

"""
fps - Frames per second
num_steps - Number of frames in one sample (in seconds)
skip_step - Number of frames to skip before next sample starts (in seconds)
"""
class KerasCnnBatchGenerator(object):
    def __init__(self, audio_data_path, motion_data_path, num_steps, batch_size, fps = 8000, skip_step=0.5, out_size = 1):
        #List all the files in the path - Both Audio and Motion
        self.audio_data_path = audio_data_path
        self.motion_data_path = motion_data_path
        self.audio_data_files = sorted(os.listdir(self.audio_data_path))
        self.motion_data_files = sorted(os.listdir(self.motion_data_path))
        
        if len(self.audio_data_files) != len(self.motion_data_files):
            sys.exit("Audio and Motion data files should be equal in number")
            
        self.total_files = len(self.audio_data_files)
        self.current_file_idx = 0
        
        self.assign_audio_frame_for_current_file() # Current Audio File's data - Assigned to self.audio_data
        self.assign_motion_frame_for_current_file() # Current Motion File's data - Assigned to self.motion_data
        self.num_steps = int(num_steps * fps)
        self.batch_size = batch_size
        self.fps = fps
        # this will track the progress of the batches sequentially through the
        # data set - once the data reaches the end of the data set it will reset
        # back to zero
        self.current_idx = 0
        # skip_step is the number of words which will be skipped before the next
        # batch is skimmed from the data set
        self.skip_step = skip_step * fps
        self.out_size = out_size

        self.num_steps_sec = num_steps
        self.skip_step_sec = skip_step
        self.median_velocity = self.motion_data["velocity"].median()
        self.mean_velocity = self.motion_data["velocity"].mean()
        self.median_avg_velocities = self.get_mean_avg_velocities()
        
    def get_mean_avg_velocities(self):
        velocities = []
        for i in np.arange(0,max(self.motion_data["timestamp"]), self.skip_step_sec):
            relevant_y = self.motion_data[(self.motion_data["timestamp"] >= i) & 
                                              (self.motion_data["end_timestamp"] < i + self.num_steps_sec)]
            velocities.append(relevant_y["velocity"].values.mean())
        return np.median(velocities)

    def assign_audio_frame_for_current_file(self):
        fps, audio_clip = wav.read(os.path.join(self.audio_data_path, self.audio_data_files[self.current_file_idx]))
        audio_clip = audio_clip / max(np.abs(audio_clip))
        self.audio_data = pd.DataFrame(audio_clip, columns = ["audio"])
        
    def assign_motion_frame_for_current_file(self):
        coords = pd.read_csv(os.path.join(self.motion_data_path, self.motion_data_files[self.current_file_idx]))
        coords["end_timestamp"] = coords[" timestamp"].shift(-1)
        coords["timestamp"] = coords[" timestamp"]
        imp_coords = coords[["timestamp", "end_timestamp", " X_0", " Y_0", " Z_0"]]
        coords = pd.DataFrame(np.asarray(imp_coords), columns = ["timestamp", "end_timestamp", "X_0", "Y_0", "Z_0"])
        coords["velocity"] = (coords["X_0"] - coords["X_0"].shift(-1)) ** 2 + \
                                (coords["Y_0"] - coords["Y_0"].shift(-1)) ** 2 + \
                                (coords["Z_0"] - coords["Z_0"].shift(-1)) ** 2
        coords["velocity"] = np.sqrt(coords["velocity"])
        self.motion_data = coords        

    def assign_angular_frame_for_current_file(self):
        coords = pd.read_csv(os.path.join(self.motion_data_path, self.motion_data_files[self.current_file_idx]))
        coords["end_timestamp"] = coords[" timestamp"].shift(-1)
        coords["timestamp"] = coords[" timestamp"]
        imp_coords = coords[["timestamp", "end_timestamp", " p_rx"]]
        coords = pd.DataFrame(np.asarray(imp_coords), columns = ["timestamp", "end_timestamp", "p_rx"])
        coords["velocity"] = (coords["p_rx"] - coords["p_rx"].shift(-1)) 
        self.motion_data = coords  
        
    def generate(self):
        x = np.zeros((self.batch_size, self.num_steps, 1))
        y = np.zeros((self.batch_size, self.out_size))
        while True:
            for i in range(self.batch_size):
                if self.current_idx + self.num_steps >= len(self.audio_data):
                    # reset the index back to the start of the data set
                    self.current_idx = 0
                    self.current_file_idx = (self.current_file_idx + 1) % self.total_files
                    self.assign_audio_frame_for_current_file()
                    self.assign_motion_frame_for_current_file()
                    
                    self.median_velocity = self.motion_data["velocity"].median()
                    self.mean_velocity = self.motion_data["velocity"].mean()
                    self.median_avg_velocities = self.get_mean_avg_velocities()
                    
                x[i, :, :] = self.audio_data.loc[self.current_idx:self.current_idx + self.num_steps - 1]["audio"].values.reshape((-1,1))
                current_idx_sec = self.current_idx / self.fps
                relevant_y = self.motion_data[(self.motion_data["timestamp"] >= current_idx_sec) & 
                                              (self.motion_data["end_timestamp"] < current_idx_sec + self.num_steps_sec)]
                
                y[i, :] = to_categorical(int(relevant_y["velocity"].values.mean() > self.median_avg_velocities), num_classes = 2)
                self.current_idx += self.skip_step
            yield x, y
            
    def generate_angular_data(self):
        x = np.zeros((self.batch_size, self.num_steps, 1))
        y = np.zeros((self.batch_size, self.out_size))
        self.assign_angular_frame_for_current_file()
        while True:
            for i in range(self.batch_size):
                if self.current_idx + self.num_steps >= len(self.audio_data):
                    # reset the index back to the start of the data set
                    self.current_idx = 0
                    self.current_file_idx = (self.current_file_idx + 1) % self.total_files
                    self.assign_audio_frame_for_current_file()
                    self.assign_angular_frame_for_current_file()
                    
                    self.median_velocity = self.motion_data["velocity"].median()
                    self.mean_velocity = self.motion_data["velocity"].mean()
                    self.median_avg_velocities = self.get_mean_avg_velocities()
                    
                x[i, :, :] = self.audio_data.loc[self.current_idx:self.current_idx + self.num_steps - 1]["audio"].values.reshape((-1,1))
                current_idx_sec = self.current_idx / self.fps
                relevant_y = self.motion_data[(self.motion_data["timestamp"] >= current_idx_sec) & 
                                              (self.motion_data["end_timestamp"] < current_idx_sec + self.num_steps_sec)]
                
                y[i, :] = to_categorical(int(relevant_y["velocity"].values.mean() > self.median_avg_velocities), num_classes = 2)
                self.current_idx += self.skip_step
            yield x, y

In [50]:
! ls "/content/drive/My Drive/Data"

open_face_audio        test_face_coordinates   valid_face_audio
open_face_coordinates  train_face_audio        valid_face_coordinates
test_face_audio        train_face_coordinates


In [0]:
def cnn1d(input_shape, num_classes):
    CONV_1D_KERNEL_SIZE = 20
    model = Sequential(name='Emo1D')
    	
    # LFLB1
    model.add(Conv1D(filters = 64,kernel_size = (CONV_1D_KERNEL_SIZE),strides=1,padding='same',data_format='channels_last',input_shape=input_shape))	
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 4, strides = 4))
    
    #LFLB2
    model.add(Conv1D(filters=64, kernel_size = CONV_1D_KERNEL_SIZE, strides=1,padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 4, strides = 4))
    
    #LFLB3
    model.add(Conv1D(filters=128, kernel_size = CONV_1D_KERNEL_SIZE, strides=1,padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 4, strides = 4))
    
    #LFLB4
    model.add(Conv1D(filters=128, kernel_size = CONV_1D_KERNEL_SIZE, strides=1,padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 4, strides = 4))
    		
    #LSTM
    model.add(LSTM(units = 64))
    #FC
    #model.add(Flatten())
    #model.add(Dense(units = 8, activation = 'relu'))
    #model.add(Dense(units = 8, activation = 'relu'))
    model.add(Dense(units=num_classes,activation='softmax'))
    
    #Model compilation	
    opt = optimizers.SGD(lr = learning_rate, decay=decay, momentum=momentum, nesterov=True)
    model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['categorical_accuracy'])
    	
    return model

In [46]:

#parser = argparse.ArgumentParser()
#args = parser.parse_args()

num_fc = 64
batch_size = 32
num_epochs = 1500 #best model will be saved before number of epochs reach this value
learning_rate = 0.0001
decay = 1e-6
momentum = 0.9

train_generator_obj = KerasCnnBatchGenerator('/content/drive/My Drive/Data/train_face_audio/', '/content/drive/My Drive/Data/train_face_coordinates/', 10, 30, fps = 8000, skip_step=1, out_size = 2)
valid_generator_obj = KerasCnnBatchGenerator('/content/drive/My Drive/Data/valid_face_audio/', '/content/drive/My Drive/Data/valid_face_coordinates/', 10, 1, fps = 8000, skip_step=10, out_size = 2)

model = cnn1d(input_shape=(train_generator_obj.num_steps, 1),num_classes=2)
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_33 (Conv1D)           (None, 80000, 64)         1344      
_________________________________________________________________
batch_normalization_33 (Batc (None, 80000, 64)         256       
_________________________________________________________________
activation_33 (Activation)   (None, 80000, 64)         0         
_________________________________________________________________
max_pooling1d_33 (MaxPooling (None, 20000, 64)         0         
_________________________________________________________________
conv1d_34 (Conv1D)           (None, 20000, 64)         81984     
_________________________________________________________________
batch_normalization_34 (Batc (None, 20000, 64)         256       
_________________________________________________________________
activation_34 (Activation)   (None, 20000, 64)         0         
__________

In [47]:
model.fit_generator(generator = train_generator_obj.generate_angular_data(), epochs = 10, verbose = 2, steps_per_epoch = 60,
                    validation_data = valid_generator_obj.generate_angular_data(), validation_steps = 60)

Epoch 1/10
 - 103s - loss: 0.6843 - categorical_accuracy: 0.5572 - val_loss: 0.6036 - val_categorical_accuracy: 0.7500
Epoch 2/10
 - 94s - loss: 0.6718 - categorical_accuracy: 0.5844 - val_loss: 0.6275 - val_categorical_accuracy: 0.7000
Epoch 3/10
 - 93s - loss: 0.6644 - categorical_accuracy: 0.6044 - val_loss: 0.6037 - val_categorical_accuracy: 0.7333
Epoch 4/10
 - 93s - loss: 0.6584 - categorical_accuracy: 0.6128 - val_loss: 0.5958 - val_categorical_accuracy: 0.7333
Epoch 5/10
 - 93s - loss: 0.6525 - categorical_accuracy: 0.6222 - val_loss: 0.6148 - val_categorical_accuracy: 0.6833
Epoch 6/10
 - 93s - loss: 0.6458 - categorical_accuracy: 0.6350 - val_loss: 0.5983 - val_categorical_accuracy: 0.7500
Epoch 7/10
 - 91s - loss: 0.6372 - categorical_accuracy: 0.6539 - val_loss: 0.5918 - val_categorical_accuracy: 0.7000
Epoch 8/10
 - 93s - loss: 0.6318 - categorical_accuracy: 0.6539 - val_loss: 0.5815 - val_categorical_accuracy: 0.7167
Epoch 9/10
 - 92s - loss: 0.6302 - categorical_accuracy

<keras.callbacks.History at 0x7fda1c7c9f98>

In [0]:
test_generator_obj = KerasCnnBatchGenerator('/content/drive/My Drive/Data/test_face_audio/', '/content/drive/My Drive/Data/test_face_coordinates/', 10, 1, fps = 8000, skip_step=10, out_size = 2)


In [52]:
model.evaluate_generator(test_generator_obj.generate_angular_data(), steps = 60)

[0.5800722390413284, 0.7]

In [7]:
i%tb

No traceback available to show.


In [0]:
generatr = train_generator_obj.generate_angular_data()

In [49]:
next(generatr)

(array([[[ 0.00545045],
         [ 0.00167706],
         [ 0.00099575],
         ...,
         [ 0.05702007],
         [ 0.05665322],
         [ 0.09412505]],
 
        [[-0.00414024],
         [-0.01273518],
         [-0.015408  ],
         ...,
         [-0.08615901],
         [-0.05644358],
         [-0.00497877]],
 
        [[ 0.00235837],
         [-0.00230596],
         [-0.00403543],
         ...,
         [ 0.13772863],
         [ 0.17504324],
         [ 0.14600912]],
 
        ...,
 
        [[-0.0713275 ],
         [-0.26162151],
         [-0.47088727],
         ...,
         [ 0.00917143],
         [ 0.00880457],
         [ 0.00214873]],
 
        [[-0.20591164],
         [-0.46166343],
         [-0.02777632],
         ...,
         [ 0.14286463],
         [ 0.02195902],
         [-0.06309942]],
 
        [[-0.05366595],
         [ 0.01509355],
         [-0.03417012],
         ...,
         [ 0.01037681],
         [ 0.00361616],
         [ 0.01084849]]]), array([[0., 1.],
  