In [5]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense
import numpy as np

In [6]:
from keras.layers import Conv2D, Dense, MaxPool1D, Lambda, Concatenate, Input, Dropout, Input, LSTM
import keras.backend as K
from keras.optimizers import Adam

In [7]:
lines = open('qndA.tsv', encoding='utf-8').read().split('\n')

In [8]:
eng_sent = []
fra_sent = []
eng_chars = set()
fra_chars = set()
nb_samples = 1000

In [9]:
# Process english and french sentences
for line in range(nb_samples):
    
    eng_line = str(lines[line]).split('\t')[0]
    
    # Append '\t' for start of the sentence and '\n' to signify end of the sentence
    fra_line = '\t' + str(lines[line]).split('\t')[1] + '\n'
    eng_sent.append(eng_line)
    fra_sent.append(fra_line)
    
    for ch in eng_line:
        if (ch not in eng_chars):
            eng_chars.add(ch)
            
    for ch in fra_line:
        if (ch not in fra_chars):
            fra_chars.add(ch)

In [10]:
fra_chars = sorted(list(fra_chars))
eng_chars = sorted(list(eng_chars))

In [11]:
# dictionary to index each english character - key is index and value is english character
eng_index_to_char_dict = {}

# dictionary to get english character given its index - key is english character and value is index
eng_char_to_index_dict = {}

for k, v in enumerate(eng_chars):
    eng_index_to_char_dict[k] = v
    eng_char_to_index_dict[v] = k

In [12]:
# dictionary to index each french character - key is index and value is french character
fra_index_to_char_dict = {}

# dictionary to get french character given its index - key is french character and value is index
fra_char_to_index_dict = {}
for k, v in enumerate(fra_chars):
    fra_index_to_char_dict[k] = v
    fra_char_to_index_dict[v] = k

In [13]:
max_len_eng_sent = max([len(line) for line in eng_sent])
max_len_fra_sent = max([len(line) for line in fra_sent])

In [14]:
tokenized_eng_sentences = np.zeros(shape = (nb_samples,max_len_eng_sent,len(eng_chars)), dtype='float32')
tokenized_fra_sentences = np.zeros(shape = (nb_samples,max_len_fra_sent,len(fra_chars)), dtype='float32')
target_data = np.zeros((nb_samples, max_len_fra_sent, len(fra_chars)),dtype='float32')

In [15]:
# Vectorize the english and french sentences

for i in range(nb_samples):
    for k,ch in enumerate(eng_sent[i]):
        tokenized_eng_sentences[i,k,eng_char_to_index_dict[ch]] = 1
        
    for k,ch in enumerate(fra_sent[i]):
        tokenized_fra_sentences[i,k,fra_char_to_index_dict[ch]] = 1

        # decoder_target_data will be ahead by one timestep and will not include the start character.
        if k > 0:
            target_data[i,k-1,fra_char_to_index_dict[ch]] = 1

In [16]:
# Encoder model

encoder_input = Input(shape=(None,len(eng_chars)))
encoder_LSTM = LSTM(256,return_state = True)
encoder_outputs, encoder_h, encoder_c = encoder_LSTM (encoder_input)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [encoder_h, encoder_c]







In [17]:
# Decoder model
# Set up the decoder, using `encoder_states` as initial state.
decoder_input = Input(shape=(None,len(fra_chars)))
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the 
# return states in the training model, but we will use them in inference.
decoder_LSTM = LSTM(256,return_sequences=True, return_state = True)
decoder_out, _ , _ = decoder_LSTM(decoder_input, initial_state=encoder_states)
decoder_dense = Dense(len(fra_chars),activation='softmax')
decoder_out = decoder_dense (decoder_out)
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`

In [18]:
model = Model(inputs=[encoder_input, decoder_input],outputs=[decoder_out])

#We train our model in two lines, while monitoring the loss on a held-out set of 20% of the samples.

# Run training
# model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
# model.fit(x=[tokenized_eng_sentences,tokenized_fra_sentences], 
#           y=target_data,
#           batch_size=32,
#           epochs=50,
#           validation_split=0.2)

In [19]:
# model.save('s2s.h5')


In [32]:

# load weights into new model
model.load_weights("s2s.h5")

In [21]:
# Inference models for testing
#To decode a test sentence, we will repeatedly:

#1) Encode the input sentence and retrieve the initial decoder state
#2) Run one step of the decoder with this initial state and a "start of sequence" token as target. The output will be the next target character.
#3) Append the target character predicted and repeat.

# Encoder inference model
encoder_model_inf = Model(encoder_input, encoder_states)

# Decoder inference model
decoder_state_input_h = Input(shape=(256,))
decoder_state_input_c = Input(shape=(256,))
decoder_input_states = [decoder_state_input_h, decoder_state_input_c]

decoder_out, decoder_h, decoder_c = decoder_LSTM(decoder_input, 
                                                 initial_state=decoder_input_states)

decoder_states = [decoder_h , decoder_c]

decoder_out = decoder_dense(decoder_out)

decoder_model_inf = Model(inputs=[decoder_input] + decoder_input_states,
                          outputs=[decoder_out] + decoder_states )

#We use it to implement the inference loop described above:

In [22]:
def decode_seq(inp_seq):
    
    # Initial states value is coming from the encoder 
    states_val = encoder_model_inf.predict(inp_seq)
    
    target_seq = np.zeros((1, 1, len(fra_chars)))
    target_seq[0, 0, fra_char_to_index_dict['\t']] = 1
    
    translated_sent = ''
    stop_condition = False
    
    while not stop_condition:
        
        decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq] + states_val)
        
        max_val_index = np.argmax(decoder_out[0,-1,:])
        sampled_fra_char = fra_index_to_char_dict[max_val_index]
        translated_sent += sampled_fra_char
        
        if ( (sampled_fra_char == '\n') or (len(translated_sent) > max_len_fra_sent)) :
            stop_condition = True
        
        target_seq = np.zeros((1, 1, len(fra_chars)))
        target_seq[0, 0, max_val_index] = 1
        
        states_val = [decoder_h, decoder_c]
        
    return translated_sent



In [23]:
#Text CNN

def textCNN(emb_dim, inp_shape):
    inp = Input(inp_shape)
    x = Conv2D(128, (1, emb_dim), activation='relu')(inp)
    x = Lambda(lambda x: K.squeeze(x, axis=2))(x)
    x = MaxPool1D(x.get_shape().as_list()[1])(x)
    x = Lambda(lambda x: K.squeeze(x, axis=1))(x)

    y = Conv2D(128, (1, emb_dim), activation='relu')(inp)
    y = Lambda(lambda x: K.squeeze(x, axis=2))(y)
    y = MaxPool1D(y.get_shape().as_list()[1])(y)
    y = Lambda(lambda x: K.squeeze(x, axis=1))(y)

    out = Concatenate(axis=1)([x, y])
    return Model(inp, out)


In [24]:
#Discriminator

def discriminator(query_cnn, response_cnn):
    query = Input(shape=(None, num_encoder_tokens, 1))
    response = Input(shape=(None, latent_dim, 1))
    
    query_features = query_cnn(query) # [B, T, D] -> [B, all_features]
    response_features = response_cnn(response)

    feat = Concatenate(axis=1)([query_features, response_features])

    x = Dense(128, activation='relu')(feat)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.1)(x)
    out = Dense(1, activation='sigmoid')(x)

    return Model([query, response], [out])

In [25]:
def combineModel(gen, dis):
    query = Input(shape=(None, num_encoder_tokens))
    decoder_inputs = Input(shape=(None, num_decoder_tokens))
    response = gen([query, decoder_inputs])
    dis.trainable = False

    feat, out = dis([query, response])
    return Model(query, [feat, out])


In [26]:
latent_dim = 188
input_shape = (8, latent_dim, 1)
query_cnn = textCNN(latent_dim, input_shape)
response_cnn = textCNN(latent_dim, input_shape)




In [27]:
num_encoder_tokens = 188
num_decoder_tokens = 182

In [28]:
dis = discriminator(query_cnn, response_cnn)
dis.compile(loss = 'binary_crossentropy', optimizer= Adam(5e-4), metrics=['accuracy'])
dis.summary()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, None, 188, 1) 0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            (None, None, 188, 1) 0                                            
__________________________________________________________________________________________________
model_4 (Model)                 (None, 256)          48384       input_7[0][0]                    
________________________________________________________________________________

In [35]:
combined_model = combineModel(dis, model)
combined_model.compile(loss = ['mae', 'binary_crossentropy'], optimizer=Adam(5e-4))

In [30]:
ones = np.ones((batch_size, 1))
zeros = np.zeros((batch_size, 1))

for e in range(epoch):
    avgL1, avgL2, avgL3 = 0, 0, 0
    for i ,query, dinput_data, response in enumerate(zip(encoder_input_data, decoder_input_data, decoder_target_data)):

        #data_gen function to get query and its corresoponding response
        #query, real_response = next(data_gen(trainImages, imageDir, annotationDir, batch_size))
        
        #Generate fake response from the generator
        fake_response = gen.predict(query, dinput_data)

        #get feature vector from the discriminator for both real and fake response
        feat1, _ = dis([query, real_responses])
        feat2, _ = dis([query, fake_responses])

        #Train the discriminator
        dis_loss_1 = dis.train_on_batch([query, real_responses], ones)
        dis_loss_2 = dis.train_on_batch([query, fake_responses], zeros)

        #Train the generator
        cgan_loss = combined_model.train_on_batch(query, [feat1, ones])
        
        #Add discriminator loss
        dis_loss = 0.5 * np.add(dis_loss_1, dis_loss_2)
        loss_1.append(dis_loss)
        # avgL1 += dis_loss[0]

        #Feature Loss
        loss_2.append(cgan_loss)
        # avgL2 += cgan_loss[1]

        if((i+1)%5 == 0):
            print("Epoch %d/%d   iteration %d/%d  D-Acc %3d%%  D-Loss: %f  cGAN_Dis-Loss: %f" % (e+1, epoch, i+1, iterations, 100*dis_loss[1], dis_loss[0], cgan_loss[0]))
        
        if(i == iterations-1):
            break
        
    # loss_3.append([avgL1/iterations, avgL2/iterations])




In [33]:
for seq_index in range(2,3):
    inp_seq = tokenized_eng_sentences[seq_index:seq_index+1]
    translated_sent = decode_seq(inp_seq)
    print('-')
    print('Input sentence:', eng_sent[seq_index])
    print('Decoded sentence:', translated_sent)

-
Input sentence: anybody else seeing fb corruption during boot?
Decoded sentence: i have a lisk and whet is the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the problem in the install the 
