In [None]:
import os
#import skvideo.io
import tqdm
import sys
import numpy as np
import csv
import cv2
import keras
from keras import backend as K
from pathlib import Path

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
LENGTH=10 #in secs
FRAME_RATE=24 # Frame rate of the frame videos
SAMPLE_RATE=2 # Sampling rate for feature extraction
NUM_FRAMES=int(LENGTH*FRAME_RATE/SAMPLE_RATE)

In [None]:
# USER INPUT- data_dir and features_dir
# data_dir- path to frame videos. Required only for feature extraction
# features_dir- path to save or load vgg face features

DIF_PATH = Path.cwd() / 'gdrive' / 'MyDrive' / 'DIF'
DIF_PATH_STR = str(DIF_PATH)
#data_dir=repo_path+'/DIFv2'+'/'+str(LENGTH)+'/frame_video' # USER INPUT Path to frame videos directory. Required only for feature extraction
features_dir = DIF_PATH / 'vgg_face7' # USER INPUT Path to save/load vgg face features
features_dir_str = str(features_dir)
#features_path=repo_path+'/features' # Folder containing all features audio and video

# Data generator

Link-https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly

In [None]:
'''
Input-  csv_file
Output- partition train, val test. Each partition consists of list of .npy files and dictionary of labels.
'''
def train_test_split(csv_path):
    label={'Drunk':1, 'Sober':0}
    partition={}
    train={}
    val={}
    test={}

    train_list=[]
    val_list=[]
    test_list=[]
    train_label={}
    val_label={}
    test_label={}

    with open(csv_path) as csvfile:
        reader=csv.reader(csvfile,delimiter=',')
        for row in reader:
            filename=row[2]
            filename=filename[:-4]
            if row[0]=='train':
                train_label[filename]=label[row[1]]
                train_list.append(filename)
            elif row[0]=='val':
                val_label[filename]=label[row[1]]
                val_list.append(filename)
            elif row[0]=='test':
                test_label[filename]=label[row[1]]
                test_list.append(filename)
            else:
                print("Error in label")
                return None
    train['list']=train_list
    val['list']=val_list
    test['list']=test_list

    train['label']=train_label
    val['label']=val_label
    test['label']=test_label

    partition['train']=train
    partition['val']=val
    partition['test']=test

    return partition
def count_classes(d):
    values=list(d.values())
    zeros=values.count(0)
    return (zeros,len(values)-zeros)

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, datapath, batch_size=32, dim=(24,1000),n_classes=2, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        #self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        self.path=datapath

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))


    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)
        import gc
        gc.collect()
        return X, y

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.load(self.path+'/' + ID + '.npy')

            # Store class
            y[i] = self.labels[ID]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

# 3 Training


## 3.1 Model creation and summary
Batch normaliztion

In [None]:
from keras.optimizers import Adam
from keras.models import Model
from keras.models import load_model
from keras.layers import Dense, Input, Dropout, LSTM, Activation,BatchNormalization
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard
from time import time
import gc

In [None]:
def create_model(num_class,lstm_units,dropout,input_shape=(None,1000)):
    X=Input(shape=input_shape)
    norm=BatchNormalization()(X)
    feat=LSTM(units=lstm_units)(norm)
    drop=Dropout(rate=dropout)(feat)
    prob=Dense(num_class, activation='sigmoid')(drop)
    return Model(inputs = X, outputs = prob)

In [None]:
lstm_units=128
dropout=.2
hp=3
model=create_model(2,lstm_units,dropout,(NUM_FRAMES,4096))
model_path = DIF_PATH_STR+'/model' # USER INPUT, path to save/load model
model.summary()
print("Must check the csv path...........")

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 120, 4096)]       0         
                                                                 
 batch_normalization_4 (Bat  (None, 120, 4096)         16384     
 chNormalization)                                                
                                                                 
 lstm_4 (LSTM)               (None, 128)               2163200   
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense_4 (Dense)             (None, 2)                 258       
                                                                 
Total params: 2179842 (8.32 MB)
Trainable params: 2171650 (8.28 MB)
Non-trainable params: 8192 (32.00 KB)
___________________

## 3.2 Fit the model

In [None]:
#USER INPUT
#split_path = repo_path+'/DIFv2/'+str(LENGTH)+'/train_test_sets/1/split_4540_642_948.csv'# or enter path to the split.csv in the parent directory
split_path = DIF_PATH_STR+'/split_4550_642_940.csv'

In [None]:
def load_keras_model(path):
    if os.path.isfile(path):
        return load_model(path)
#Loading data filenames split

partition=train_test_split(split_path)
print("Number of training examples ")
print(len(partition['train']['list']))
print("Number of validation examples ")
print(len(partition['val']['list']))

params = {'datapath':features_dir_str ,
          'dim': (NUM_FRAMES,4096),
          'batch_size': 64,
          'n_classes': 2,
          'shuffle': False}

#weights for imbalance classes
count=count_classes(partition['train']['label'])
print("Class instances in training class.\n Sober:",count[0]," Drunk:",count[1])
weight_0=float(count[0]+count[1])/float(count[0])
weight_1=float(count[0]+count[1])/float(count[1])
class_weight={0:weight_0, 1:weight_1}

#instances in val set
count=count_classes(partition['val']['label'])
print("Class instances in val class.\n Sober:",count[0]," Drunk:",count[1])

#instances in test set
count=count_classes(partition['test']['label'])
print("Class instances in test class.\n Sober:",count[0]," Drunk:",count[1])


Number of training examples 
4542
Number of validation examples 
640
Class instances in training class.
 Sober: 1000  Drunk: 3542
Class instances in val class.
 Sober: 319  Drunk: 321
Class instances in test class.
 Sober: 356  Drunk: 584


In [None]:

model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=["accuracy"])

#saving best model
checkpoint = ModelCheckpoint(model_path+'/model-{epoch:03d}-{val_acc:03f}.h5', verbose=1, monitor='val_acc',save_best_only=False, mode='max',period=10)


#tensorboard
tensorboard = TensorBoard(log_dir=model_path)

train_generator=DataGenerator(partition['train']['list'],partition['train']['label'], **params)
val_generator=DataGenerator(partition['val']['list'],partition['val']['label'], **params)

print("generator created")

model.fit_generator(generator=train_generator,
                    epochs=10,
                    validation_data=val_generator,
                    use_multiprocessing=True,
                    workers=4,
                    #callbacks=[checkpoint,tensorboard],
                    class_weight=class_weight)



generator created


  model.fit_generator(generator=train_generator,


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7cd6203cfd90>

In [None]:
model.save(model_path+'/model-{epoch:03d}-{val_acc:03f}.keras')