In [4]:
import configparser

import os

vAR_Config = configparser.ConfigParser(allow_no_value=True)

vAR_INI_FILE_PATH = os.getenv('AQG')
#print(vAR_INI_FILE_PATH)

vAR_Config.read(vAR_INI_FILE_PATH)

vAR_Data = vAR_Config.sections()

vAR_uploaded = vAR_Config['FILE PATH']['TEST_DATA']
print(vAR_uploaded)


C:\AI\AUTOMATIC QUESTION GENERATION\ML\TEST DATA\data.txt


In [5]:
##################################################
#        Step-1:Importing the libraries          #
##################################################
import pandas as pd
import numpy as np

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
import numpy as np
import random
import sys
import io

In [7]:
###################################################
#          Step-2:Loading the data                #
###################################################

vAR_text = open(vAR_uploaded, 'r').read().lower()
print('text length', len(vAR_text))

text length 2163


In [8]:
print(vAR_text[:300])

an apple is an edible fruit produced by an apple tree (malus domestica). apple trees are cultivated worldwide and are the most widely grown species in the genus malus. the tree originated in central asia, where its wild ancestor, malus sieversii, is still found today. apples have been grown for thou


In [9]:
######################################################################
#  Step-3:Mapping from Character to integer and Integer to Character # 
######################################################################
vAR_chars = sorted(list(set(vAR_text)))
print('total chars: ', len(vAR_chars))

total chars:  39


In [10]:
vAR_char_indices = dict((c, i) for i, c in enumerate(vAR_chars))
vAR_indices_char = dict((i, c) for i, c in enumerate(vAR_chars))

In [11]:
###################################################################
# Step-4 Splitting up of data to a length for training the model. #
###################################################################

vAR_maxlen = 40
vAR_step = 3
vAR_sentences = []
vAR_next_chars = []
for i in range(0, len(vAR_text) - vAR_maxlen, vAR_step):
    vAR_sentences.append(vAR_text[i: i + vAR_maxlen])
    vAR_next_chars.append(vAR_text[i + vAR_maxlen])
print('nb sequences:', len(vAR_sentences))

nb sequences: 708


In [12]:
print(vAR_sentences[:3])
print(vAR_next_chars[:3])


['an apple is an edible fruit produced by ', 'apple is an edible fruit produced by an ', 'le is an edible fruit produced by an app']
['a', 'a', 'l']


In [13]:
###################################################################
#         Step-5 Transform data into Boolean Array                #
###################################################################
vAR_x = np.zeros((len(vAR_sentences), vAR_maxlen, len(vAR_chars)), dtype=np.bool)
vAR_y = np.zeros((len(vAR_sentences), len(vAR_chars)), dtype=np.bool)
for i, sentence in enumerate(vAR_sentences):
    for t, char in enumerate(sentence):
        vAR_x[i, t, vAR_char_indices[char]] = 1
    vAR_y[i, vAR_char_indices[vAR_next_chars[i]]] = 1

In [15]:
print(vAR_x[:3])
print(vAR_y[:3])

[[[False False False ... False False False]
  [False False False ... False False False]
  [False  True False ... False False False]
  ...
  [False False False ... False False False]
  [False False False ... False  True False]
  [False  True False ... False False False]]

 [[False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]
  ...
  [False False False ... False False False]
  [False False False ... False False False]
  [False  True False ... False False False]]

 [[False False False ... False False False]
  [False False False ... False False False]
  [False  True False ... False False False]
  ...
  [False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]]]
[[False False False False False False False False False False False False
  False  True False False False False False False False False False False
  False False False False False Fals

In [16]:
##############################################################
#      Step-6: Creating a Recurrent Neural Network           #
##############################################################
model = Sequential()
model.add(LSTM(128, input_shape=(vAR_maxlen, len(vAR_chars)))) #will learn the sequence
model.add(Dense(len(vAR_chars))) #one output neuron for each unique char
model.add(Activation('softmax')) #Transforms outputs to probability values

In [17]:
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [18]:
##############################################################
#                Step-7:Helper Functions                     #
##############################################################
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    #temperature which defines the freedom the function has when creating text
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [19]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)
    start_index = random.randint(0, len(vAR_text) - vAR_maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)
        generated = ''
        sentence = vAR_text[start_index: start_index + vAR_maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
        for i in range(400):
            x_pred = np.zeros((1, vAR_maxlen, len(vAR_chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, vAR_char_indices[char]] = 1.
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = vAR_indices_char[next_index]
            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)


In [20]:
####################################################
#           Step-8:Callback Functions              #
####################################################

# will save our model each epoch the loss decreases.
from keras.callbacks import ModelCheckpoint

filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss',
                             verbose=1, save_best_only=True,
                             mode='min')

In [21]:
#will reduce the learning rate each time our learning plateaus.
from keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=1, min_lr=0.001)

In [22]:
callbacks = [print_callback, checkpoint, reduce_lr]

In [23]:
########################################################################
#       Step-9:Training the model and generating new text              #
########################################################################
model.fit(vAR_x, vAR_y, batch_size=128, epochs=10, callbacks=callbacks)

Epoch 1/10
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "ts wild ancestor, malus sieversii, is st"
ts wild ancestor, malus sieversii, is stnn nn en n  nnn n ln nn n nnnn nnn n nn n  e nnnn n n n  nnn nn nn n   n nninnn annnnnn  nnn nnn  nn n   nn nn n n nnntn n i nan   n n n nnnnn n  nnnnn nn n nn n  nnn n  n n n n n  nn   n nn nnnn  n nnnnn n  nn e nn  n  nnnn n n nnnnnn nn  n n n na n  n   n n nnn n   nnn nnnn enn l  n nn  nnn    nnn n n nnnn n nn n n nn n nn n  nn  n nn nn n nnn inn n en n n  nnnnnnn n n n  n nnnn e n n n n n n  n
----- diversity: 0.5
----- Generating with seed: "ts wild ancestor, malus sieversii, is st"
ts wild ancestor, malus sieversii, is st  nn     nean  nn  e  nn ne noinln n nn  iiln and nn hfnn  dnnan nohonodd n ai   gsuln in s lnv ngnnno nn nd nllnenh  n nennn   nennne  ennddlnn   n, e d iantsn lnn  neenmn  den  u  nnn- ann  n  ule es nn  nn dnin en n de ne  nan ,   nnnt n el  nn en innnn nnne n ntnr lunninanrr oen  nn

 -- cleanliness, contentment, austerity,     t s     e a  i   e ss   te r t  ts  r ittt set  c  oel  nttg vtt   s t o   tt  s  t te  tn t s tte t aroto  t    t ta t tet te t   t      ts tt  g t  t   ret   t    u   e c   h p t to t  ot   t t he eetr  tot. at  t  t  leo a etr itooste ih tth tt e r c  t tttof ee  ti d i at a     y s     a  t tat t   trta tt tu thts te t ao e   a t et td  ttte t  t  at  at a t to  tie ot tia e  w t   te  sp
----- diversity: 1.0
----- Generating with seed: " -- cleanliness, contentment, austerity,"
 -- cleanliness, contentment, austerity,lonow d tge  tdtsu -ott)u erleima ur ctneosiaee  s aort r ,htcotcycerwaewpe- , croo-ieaiwee e oe tofrgntleieoermiei si b t e  asn d tettbtc qd eattna t groe lfzithtupt   ayh i apr ytwweh yefa r tr. d o n t r roe vt f  ryttdf o- gtotapa fngslen ib,dce eh  fh a t vee rie cmnsugtn  ote   t a ,weetatrjde wi leftn tedcz ns ahff uoaw   enwonttteef 0itee ya thu lesursgaoga
tes et  innhn gdtuehseven 
  s 
----- diversity: 1.2
-----

l is submerged in saltwater, water molectlyit , cs qanbaaaef an db s --s vocearotc.lonhages byg th. acaaahe,abaris.nlmnsir hh fedgst nddbol wa trrbf waa 
   dh,-e cf s   ane vol th tpusashsarhaplr leus e neraotf wuanoon fuw,hb pa goaca ar ronoef fhnlanf lh fe ioe fhlvomue s-osb me uh a gpp cusoy at ouipoau fv ae- asol a rmfpauveos, ing lull
c aosem.ef ph gaiasslall aolfarhhdinfnas bgacd irl 
hra haf-ns at  op ntns0ts qapaa ialilddan sas

Epoch 00007: loss improved from 2.80168 to 2.68488, saving model to weights.hdf5
Epoch 8/10
----- Generating text after Epoch: 7
----- diversity: 0.2
----- Generating with seed: "de of rice and tamarind. 
priya writes p"
de of rice and tamarind. 
priya writes pon in  an  an  in  an  in  an  in  an  in  an  an  an  io te te ton te te tin  in  te te te te te te te te te te te te tu a  in  an  ar  io  an  an  an  an  an te ton  io  an  an  io te te te in  an  an  oo te te te te tin  an  an  io  in  an  an  an  an  te te te te te te ter an io  ae te te te 

<tensorflow.python.keras.callbacks.History at 0x2367a917670>

In [24]:
#############################################################################
#                  Step-10-Run model on Test Data                           #
#############################################################################

#Creating generate_text function similar to on_epoch_end function.

def generate_text(length, diversity):
    # Get random starting text
    start_index = random.randint(0, len(vAR_text) - vAR_maxlen - 1)
    generated = ''
    sentence = vAR_text[start_index: start_index + vAR_maxlen]
    generated += sentence
    for i in range(length):
            x_pred = np.zeros((1, vAR_maxlen, len(vAR_chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, vAR_char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = vAR_indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char
    return generated

In [None]:
###################################################################
# Step-11: Review Model Outcome and write model outcome to a file #
###################################################################

print(generate_text(500, 0.2))
vAR_textOutput=generate_text(500,0.2)
vAR_outFile = open("myOutFile.txt","w")
vAR_outFile.write(vAR_textOutput)


ne to a number of fungal, bacterial and th ti t s ar  tin  th te tin  th tin in  th tin tin in  tos an  th ton in  tos ind th tin in  th tin  tol tin in ar  te tin in an ar an and th t s in i s and the tin  to te tin i s and th tis tin  to tin in t an t are tin  tol is ars th te te te te tin in  to ton in ar ar ar an  tos in ari tos ars tin ind the ter th te tin  to tin in  to te tin  te tin is ar  te tin in and ar  te tin  tos ind the tin  tos in tin in  to te te tin in  to tin  th te te ter te te te tin in  to tin in  tos in ar ar  
