# Parsing and Model Training using Pokemon Method

This script parses passwords and prepares them for use in the recurrent neural network
It then trains the model and produces a list of 3 passwords over 5 epochs

In [1]:
import numpy as np

passes = open("RandPass.txt", 'r').readlines()  # Used as temporary storage for the file so that line endings can be removed

full_list = [0]*len(passes) #creating a new list for stripped passwords
p = 0

for i in passes:
    full_list[p] = i.rstrip("\n\r") + " " # Remove line endings and add a space to indicate end of line
    p=p+1

char_to_index = dict((chr(i+31),i) for i in range(1,97))  # Creating an array of all characters that can be used with corresponding index value

index_to_char =  dict((i, chr(i+31)) for i in range(1,97)) # Reverse of the previous

The data must be one hot encoded in order to train the model and produce outputs

In [2]:
max_char = len(max(full_list, key=len))
m = len(full_list)
char_dim = len(char_to_index)

X = np.zeros((m,max_char, char_dim)) # 3D array for storing one hot encoded characters
Y = np.zeros((m,max_char, char_dim))

for i in range(m):          # One hot encoding
    password = list(full_list[i])
    for j in range(len(password)):
        X[i,j, char_to_index[password[j]]] = 1
        if j < len(password)-1:
            Y[i,j, char_to_index[password[j+1]]] = 1

In [3]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import LambdaCallback

model = Sequential()
model.add(LSTM(256, input_shape=(max_char, char_dim), return_sequences=True))
model.add(Dense(char_dim, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

Using TensorFlow backend.


In [4]:
def make_name(model): # Create the name
    name = []
    x = np.zeros((1, max_char, char_dim))
    end = False
    i = 0
    
    while end==False:
        probs = list(model.predict(x)[0,i])
        probs = probs / np.sum(probs)
        
        index = np.random.choice(range(char_dim), p=probs) # Picks a random character based off of its probability
        while i<5 and index == 1:
            index = np.random.choice(range(char_dim), p=probs)
            
        if i == max_char-2:
            character = ' '
            end = True
        else:
            character = index_to_char[index]
        name.append(character)
        x[0, i+1, index] = 1
        i += 1
        if character == ' ':
            end = True
    
    return(''.join(name))

In [5]:
def generate_name_loop(epoch, _):
    print("Epoch Completed: ", epoch)
    if epoch % 5 == 0:
        
        print('Names generated after epoch %d:' % epoch)

        for i in range(3):
            p = str(make_name(model))
            print(p)
        
        
    if epoch == 100 or epoch == 499:
        
        print('Names generated after epoch %d:' % epoch)

        for i in range(1000):
            make_name(model)
        
        print()

In [None]:
name_generator = LambdaCallback(on_epoch_end = generate_name_loop)

model.fit(X, Y, batch_size=512, epochs=500, callbacks=[name_generator], verbose=0)

Epoch %d Completed 0
Names generated after epoch 0:
Biqoment6 
hpaomove 
yas2nak 
Epoch %d Completed 1
Epoch %d Completed 2
Epoch %d Completed 3
Epoch %d Completed 4
Epoch %d Completed 5
Names generated after epoch 5:
ponica 
abdbennaf 
wD7dflhesUQ 
Epoch %d Completed 6
Epoch %d Completed 7
Epoch %d Completed 8
Epoch %d Completed 9
Epoch %d Completed 10
Names generated after epoch 10:
cybbf 
alear 
perta66 
Epoch %d Completed 11
Epoch %d Completed 12
Epoch %d Completed 13
Epoch %d Completed 14
Epoch %d Completed 15
Names generated after epoch 15:
jomja 
93812594f 
07kh1 
Epoch %d Completed 16
Epoch %d Completed 17
Epoch %d Completed 18
Epoch %d Completed 19
Epoch %d Completed 20
Names generated after epoch 20:
eampruze 
4071995 
andrey2 
Epoch %d Completed 21
Epoch %d Completed 22
Epoch %d Completed 23
Epoch %d Completed 24
Epoch %d Completed 25
Names generated after epoch 25:
542277 
692964 
es07001983 
Epoch %d Completed 26


In [None]:
temp = open("NeuralGen2.txt", "a+")
for i in range(1000000):
    x = str(make_name(model))
    temp.write(x)
    print(x)
        