In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import tensorflow as tf
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nltk
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from keras.layers import Dense, Input, Flatten, Conv2D,Conv1D, MaxPooling2D,MaxPooling1D,GlobalMaxPooling1D,GlobalMaxPooling2D
from keras.layers import Reshape, Dropout, Concatenate, LSTM,Bidirectional,BatchNormalization
from keras.layers import Flatten,Activation,CuDNNGRU,CuDNNLSTM,LeakyReLU, Concatenate,concatenate
from keras.models import Model
from keras.models import Sequential
from keras import regularizers,optimizers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize, MinMaxScaler
from sklearn.metrics import accuracy_score
import os
import json
from IPython.core.display import display, HTML
from tqdm import tqdm_notebook
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline


Using TensorFlow backend.


In [0]:
PAD_ROWS=26
PAD_COLS=600

In [5]:
print(os.listdir('drive/My Drive/DL Dataset'))
dir = 'drive/My Drive/DL Dataset/'
f = open(os.path.join(dir, 'train.json'), encoding="utf8")
data = json.load(f)

['dev.json', 'train.json', 'valence_1000.h5', 'model_cnn_final.h5', 'best_model_cnn.h5', 'model_cnn.h5', 'NLU', 'res1.json']


In [6]:
df = pd.DataFrame(data)
df= df.T
TOTAL_DATA=df.shape[0]
df['labels'] = df[['valence', 'activation']].values.tolist()
df.head()

Unnamed: 0,valence,activation,features,labels
0,0,1,"[[5.502810676891276, 5.389630715979907, 5.8907...","[0, 1]"
1,1,1,"[[5.059076172970736, 5.288492317702101, 4.2633...","[1, 1]"
2,0,1,"[[4.218546271669202, 4.961436495859291, 3.6650...","[0, 1]"
3,1,0,"[[4.650364321573866, 4.523905028353254, 5.0168...","[1, 0]"
4,0,1,"[[3.900221957277269, 2.7325726489808124, 2.565...","[0, 1]"


In [7]:
### DATA AUGMENTATION  - To Use a concatenated layer

# For input to CNN : Shape: (N x 26)

label_set=pd.DataFrame(np.zeros((TOTAL_DATA,1),dtype=np.int),columns=['labels'])
code=0
scaler = MinMaxScaler(feature_range=(0.1, 1))

# Use only 600 features to learn
# ------------------------------------------------ Get Label Code ----------------------------------------------------------
for i in tqdm_notebook(range(TOTAL_DATA)):
    v = df['valence'][i]
    a = df['activation'][i]
    
    if v==0 and a==0:
        code=0
    elif v==0 and a==1:
        code=1
    elif v==1 and a==0:
        code=2
    elif v==1 and a==1:
        code=3
    label_set['labels'][i]=code



HBox(children=(IntProgress(value=0, max=7800), HTML(value='')))




In [8]:
# ------------------------------------------------ Get Features with Augmentation ----------------------------------------------------------
feat1 = np.zeros((TOTAL_DATA,PAD_COLS, PAD_ROWS),dtype=np.float32)
feat2 = np.zeros((TOTAL_DATA,PAD_COLS, PAD_ROWS),dtype=np.float32)
feat3 = np.zeros((TOTAL_DATA,PAD_COLS, PAD_ROWS),dtype=np.float32)
feat4 = np.zeros((TOTAL_DATA,PAD_COLS, PAD_ROWS),dtype=np.float32)
scaler = MinMaxScaler(feature_range=(0.1, 1))
for i in tqdm_notebook(range(TOTAL_DATA)):
    x = np.array(df['features'][i])[:PAD_COLS,:]
    [r,c] = x.shape
    feat1[i,:r,:c] = scaler.fit_transform(np.float32(x))   # Scale to [0,1] range
    feat2[i,:r,:c] = scaler.fit_transform(np.float32(x+1.11111111))
    feat3[i,:r,:c] = scaler.fit_transform(np.float32(x+3.33333333))
    feat4[i,:r,:c] = scaler.fit_transform(np.float32(x-3.33333333))

HBox(children=(IntProgress(value=0, max=7800), HTML(value='')))




In [0]:
labels=label_set['labels'].astype(int)
labels = pd.get_dummies(label_set['labels']).values.tolist()
X = np.concatenate((feat1,feat2,feat3,feat4), axis=0)
Y = np.array(labels)
Y1=Y
Y=np.vstack((Y,Y1))
Y=np.vstack((Y,Y1))
Y=np.vstack((Y,Y1))

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.15, random_state=98)

input_dim=X[0].shape

In [22]:

Y.shape

(31200, 4)

In [0]:
ACCURACY_THRESHOLD = 0.96
class myCallback(tf.keras.callbacks.Callback):
	def on_epoch_end(self, epoch, logs={}):
		if(logs.get('categorical_accuracy') > 0.69 and logs.get('val_loss') <2 and logs.get('val_categorical_accuracy') > ACCURACY_THRESHOLD):
			print("\nReached %2.2f%% accuracy. Stopping training!!" %(ACCURACY_THRESHOLD*100))
			self.model.stop_training = True

# Instantiate a callback object
callbacks = myCallback()


In [35]:
warnings.filterwarnings("ignore")
model = Sequential()
model.add(Conv1D(filters=256, kernel_size=9, padding='same',input_shape=input_dim,name='Conv1'))
model.add(LeakyReLU(alpha=0.3))
model.add(BatchNormalization())
model.add(MaxPooling1D(3))
model.add(Dropout(0.5))

model.add(Conv1D(filters=128, kernel_size=6, padding='same',name='Conv2'))
model.add(LeakyReLU(alpha=0.3))
model.add(BatchNormalization())
model.add(MaxPooling1D(3))
model.add(Dropout(0.5))

# model.add(Conv1D(filters=96, kernel_size=6, padding='same',name='Conv3'))
# model.add(LeakyReLU(alpha=0.3))
# model.add(BatchNormalization())
# model.add(MaxPooling1D(3))
# model.add(Dropout(0.3))

# model.add(Conv1D(filters=96, kernel_size=3, padding='same',name='Conv4'))
# model.add(LeakyReLU(alpha=0.3))
# model.add(BatchNormalization())
# model.add(MaxPooling1D(3))
# model.add(Dropout(0.3))

model.add(Conv1D(filters=128, kernel_size=3, padding='same',name='Conv5'))
model.add(LeakyReLU(alpha=0.3))
model.add(BatchNormalization())
model.add(GlobalMaxPooling1D())

model.add(Dense(4, activation='softmax'))

adadelta = optimizers.Adadelta()
#sgd = optimizers.SGD(lr=0.001, decay=1e-3, momentum=0.9, nesterov=True)
model.compile(optimizer=adadelta,loss='categorical_crossentropy',metrics=['categorical_accuracy'])

history=model.fit(X_train, y_train, use_multiprocessing=True, verbose=1, epochs=1000, validation_data=(X_test, y_test), shuffle=True, callbacks=[callbacks])  # train the model


Train on 26520 samples, validate on 4680 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69

In [41]:
model.fit(X_train, y_train, verbose=1, initial_epoch=60, epochs=1000, validation_data=(X_test, y_test), shuffle=True, callbacks=[callbacks])  # train the model

Train on 26520 samples, validate on 4680 samples
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000


<keras.callbacks.History at 0x7f879e780b00>

In [0]:
#Saving a model Syntax:
model.save("drive/My Drive/DL Dataset/model_cnn_best1.h5")

In [0]:
f = open(os.path.join(dir, 'dev.json'), encoding="utf8")
dev = json.load(f)

In [42]:
df1 = pd.DataFrame(dev)
df1= df1.T
TOTAL_DATA=df1.shape[0]
tst = np.zeros((TOTAL_DATA,PAD_COLS, PAD_ROWS),dtype=np.float32)
scaler = MinMaxScaler(feature_range=(0.1, 1))


for i in tqdm_notebook(range(TOTAL_DATA)):
    x = np.array(df1['features'][i])[:PAD_COLS,:]
    [r,c] = x.shape
    tst[i,:r,:c] = scaler.fit_transform(x)   # Scale to [0,1] range
    
print(tst.shape)

HBox(children=(IntProgress(value=0, max=3342), HTML(value='')))


(3342, 600, 26)


In [0]:
pred = model.predict(tst)
y_label=np.argmax(y_test, axis=1)
pred=np.argmax(pred, axis=1)

In [44]:
res=pd.DataFrame(np.zeros((TOTAL_DATA,2),dtype=np.int),columns=['valence','activation'])

## Prepare output file : Decipher predicted code to valence and activation values
for i in tqdm_notebook(range(TOTAL_DATA)):
    
    x=pred[i]
    if x==0:        
       v=0
       a=0
    elif x==1:
        v=0
        a=1
    elif x==2:
        v=1
        a=0
    elif x==3:
        v=1
        a=1
    res['valence'][i] = v
    res['activation'][i] = a

HBox(children=(IntProgress(value=0, max=3342), HTML(value='')))




In [0]:
res.to_json('drive/My Drive/DL Dataset/NLU/res98.json',orient='index')

In [0]:
model.fit(X_train, y_train, verbose=1, initial_epoch=380, epochs=1000, validation_data=(X_test, y_test), shuffle=True, callbacks=[callbacks])  # train the model

In [0]:
model.summary()

In [0]:
feat_merged=np.concatenate((feat, feat1), axis=1)

In [0]:
feat_merged.shape