In [23]:
import numpy as np
import itertools 
import operator
from datetime import datetime
import sys

# https://machinelearningmastery.com/difference-between-a-batch-and-an-epoch/
# https://stackoverflow.com/questions/4265988/generate-random-numbers-with-a-given-numerical-distribution
# https://superuser.com/questions/513496/how-can-i-run-a-command-from-the-terminal-without-blocking-it
# https://www.cyberciti.biz/faq/find-out-what-processes-are-running-in-the-background-on-linux/
# password possible char
# a-z    A-Z     1234567890      @%+\/'!#$^?:,.(){}[]~-_*   26+26+10+24

In [24]:
# credit from https://songhuiming.github.io/pages/2017/08/20/build-recurrent-neural-network-from-scratch/
def softmax(x):
    xt = np.exp(x - np.max(x))
    return xt / np.sum(xt)


'''
 - model: 
 - X_train:
 - y_train:
 - learning_rate:
 - nepoch:
 - evaluate loss_after:
'''
def train_with_sgd(model, X_train, y_train, learning_rate = 0.005, nepoch = 100, evaluate_loss_after = 5):
    # keep track of the losses so that we can plot them later
    losses = []
    num_examples_seen = 0
    for epoch in range(nepoch):
        # for each training example...
        print(str(epoch) + "th epoch:")
        count = 0
        for i in range(len(y_train)):
            # one sgd step
            model.sgd_step(X_train[i], y_train[i], learning_rate)
            num_examples_seen += 1
            count = count + 1
            if(count % 1000000 == 0):
                np.save('U.npy', model.U)
                np.save('V.npy', model.V)
                np.save('W.npy', model.W)
                print("    " + str(count))
        
        np.save('U.npy', model.U)
        np.save('V.npy', model.V)
        np.save('W.npy', model.W)
        # optionally evaluate the loss
        if (epoch % evaluate_loss_after == 0):
            loss = model.calculate_loss(X_train, y_train)
            losses.append((num_examples_seen, loss))
            time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print("%s: loss after num_examples_seen=%d epoch=%d: %f" %(time, num_examples_seen, epoch, loss))
            # adjust the learning rate if loss increases
            if (len(losses) > 1 and losses[-1][1] > losses[-2][1]):
                learning_rate = learning_rate * 0.5
                print("setting learning rate to %f" %(learning_rate))
            sys.stdout.flush()

In [72]:
class RNNNumpy():
    def __init__(self, word_dim, hidden_dim = 100, bptt_truncate = 4, continued = False):
        # assign instance variable
        self.word_dim = word_dim   # number of possible characters, in this case 86
        self.hidden_dim = hidden_dim # number of hidden units 
        self.bptt_truncate = bptt_truncate
        
        if(continued):
            self.U = np.load("U.npy")
            self.V = np.load("V.npy")
            self.W = np.load("W.npy")
        else:
            # random initiate the parameters
            self.U = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim), (hidden_dim, word_dim))
            self.V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (word_dim, hidden_dim))
            self.W = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))
    
    def forward_progagation(self, x):
        # total time steps / length of the password
        T = len(x)

        #intialize s, with the initial state s set to zero, we thus have T+1 rows, each with size of hidden_dim
        s = np.zeros((T+1, self.hidden_dim))
        #intialize o, 
        o = np.zeros((T, self.word_dim))

        for t in np.arange(T):
            # since x is a one-hot encoder, index U with x[t] is the same thing as mutiply U with tons of 0s
            r = s[t-1]
            print(r.shape)
            s[t] = np.tanh(self.U[:, x[t]] + self.W.dot(s[t-1]))
            o[t] = softmax(self.V.dot(s[t])) 
        return [o, s]

    def predict(self, x):
        o, s = self.forward_progagation(x)
        return np.argmax(o, axis = 1)
    
    
    # in this case x and y are 2-d arrays
    def calculate_total_loss(self, x, y):
        L = 0
        # for each sentence ...
        for i in np.arange(len(y)):
            o, s = self.forward_progagation(x[i])
            # we only care about our prediction of the "correct" words
            correct_word_predictions = o[np.arange(len(y[i])), y[i]]
            # add to the loss based on how off we were
            L += -1 * np.sum(np.log(correct_word_predictions))
        return L

    # in this case x and y are 2-d arrays
    def calculate_loss(self, x, y):
        # divide the total loss by the number of training examples
        N = np.sum((len(y_i) for y_i in y))
        return self.calculate_total_loss(x, y)/N
    
    
    def bptt(self, x, y):
        T = len(y)
        # perform forward propagation
        o, s = self.forward_progagation(x)
        # we will accumulate the gradients in these variables
        dLdU = np.zeros(self.U.shape)
        dLdV = np.zeros(self.V.shape)
        dLdW = np.zeros(self.W.shape)
        delta_o = o
        delta_o[np.arange(len(y)), y] -= 1   # it is y_hat - y
        # for each output backwards ...
        for t in np.arange(T):
            dLdV += np.outer(delta_o[t], s[t].T)    # at time step t, shape is word_dim * hidden_dim
            # initial delta calculation
            delta_t = self.V.T.dot(delta_o[t]) * (1 - (s[t] ** 2))
            # backpropagation through time (for at most self.bptt_truncate steps)
            # given time step t, go back from time step t, to t-1, t-2, ...
            for bptt_step in np.arange(max(0, t-self.bptt_truncate), t+1)[::-1]:
                # print("Backprogation step t=%d bptt step=%d" %(t, bptt_step))
                dLdW += np.outer(delta_t, s[bptt_step - 1])
                dLdU[:, x[bptt_step]] += delta_t
                # update delta for next step
                dleta_t = self.W.T.dot(delta_t) * (1 - s[bptt_step-1]**2)
        return [dLdU, dLdV, dLdW]
    
            
    def sgd_step(self, x, y, learning_rate):
        dLdU, dLdV, dLdW = self.bptt(x, y)
        self.U -= learning_rate * dLdU
        self.V -= learning_rate * dLdV
        self.W -= learning_rate * dLdW
    
    def generate_guess(self):
        pw = []
        s = np.zeros(self.hidden_dim)
        o = np.zeros(self.word_dim)
        char = 0
        prob = 1
        
        for t in np.arange(100):
            s = np.tanh(self.U[:, char] + self.W.dot(s))
            o = softmax(self.V.dot(s))
            
            char = np.random.choice(np.arange(0, 97), p=o)
            prob = 1 * o[char]
            
            if(char == 96 or char == 0):
                pw = [x + 31 for x in pw] 
                s = ''.join(map(chr, pw)) 
                return s, prob
            pw.append(char)
            
            


In [30]:
# around 70 secs for rockyou.txt
# https://theasciicode.com.ar/ascii-control-characters/backspace-ascii-code-8.html
# set range for char 32 - 126 in ascii,  95 chars, remove any pw cotains char outside the range
# also we want an addtional char START and END indicates the start and end of the password, so totally 97 chars
# END would only appear in y, START only in x 

def generate_training_pw(infile):
    f = open(infile, "r", errors='ignore')
    line = f.readline()
    x = []
    y = []
    count = 0
    while line != "":
        l = [ord(i)-31 for i in line][:-1] # convert to int
        if any(y > 95 or y < 1 for y in l):
            line = f.readline()
            continue
        l.insert(0, 0)
        x.append(l) # add to X

        l.append(96)
        l = l[1:]
        y.append(l) # modify and add to y
        line = f.readline()

        #count
        count = count+1
        if(count % 1000000 == 0):
            print(str(count / 1000000) + "million" , end = "->")
    print("Finished with " + str(count) + "passwords") 
    return x,y

In [51]:
x, y = generate_training_pw("rockyou.txt") #input x,y

offset = 0

1.0million->2.0million->3.0million->4.0million->5.0million->6.0million->7.0million->8.0million->9.0million->10.0million->11.0million->12.0million->13.0million->14.0million->Finished with 14330062passwords


In [52]:

np.random.seed(10)
char_size = 97

if(offset == 0):
    model = RNNNumpy(char_size)
else:
    # Insert previous U, V, W into model 
    model = RNNNumpy(char_size, continued=True)
    
    #finsh up this epoch
    losses = train_with_sgd(model, x, y, nepoch = 1)

losses = train_with_sgd(model, x, y, nepoch = 10, evaluate_loss_after = 5)


# TODO: Generate random passwords, then check for 

0th epoch'
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
310000
320000
330000
340000
350000
360000
370000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
480000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
700000
710000
720000
730000
740000
750000
760000
770000
780000
790000
800000
810000
820000
830000
840000
850000
860000
870000
880000
890000
900000
910000
920000
930000
940000
950000
960000
970000
980000
990000
1000000
1010000
1020000
1030000
1040000
1050000
1060000
1070000
1080000
1090000
1100000
1110000
1120000
1130000
1140000
1150000
1160000
1170000
1180000
1190000
1200000
1210000
1220000
1230000
1240000
1250000
1260000
1270000
1280000
1290000
1300000
1310000
1320000
1330000
1340000
1350000
1360000
1370000
1

10340000
10350000
10360000
10370000
10380000
10390000
10400000
10410000
10420000
10430000
10440000
10450000
10460000
10470000
10480000
10490000
10500000
10510000
10520000
10530000
10540000
10550000
10560000
10570000
10580000
10590000
10600000
10610000
10620000
10630000
10640000
10650000
10660000
10670000
10680000
10690000
10700000
10710000
10720000
10730000
10740000
10750000
10760000
10770000
10780000
10790000
10800000
10810000
10820000
10830000
10840000
10850000
10860000
10870000
10880000
10890000
10900000
10910000
10920000
10930000
10940000
10950000
10960000
10970000
10980000
10990000
11000000
11010000
11020000
11030000
11040000
11050000
11060000
11070000
11080000
11090000
11100000
11110000
11120000
11130000
11140000
11150000
11160000
11170000
11180000
11190000
11200000
11210000
11220000
11230000
11240000
11250000
11260000
11270000
11280000
11290000
11300000
11310000
11320000
11330000
11340000
11350000
11360000
11370000
11380000
11390000
11400000
11410000
11420000
11430000
11440000
1



2020-05-19 14:32:42: loss after num_examples_seen=14330062 epoch=0: 7.821211
1th epoch'
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000


KeyboardInterrupt: 

In [73]:
file = open('myfile.txt', 'w') 
char_size = 97

model = RNNNumpy(char_size, continued=True)
i = 0
count = 0
while i < 10:
    i = i + 1
    s, prob = model.generate_guess()
    if(prob > 0.2):
        count = count + 1
        file.writelines(s + '\n')
        

file.close()

12
48
82
64
58
2
13
34
90
69
8
45
79
10
25
42
50
58
66
26
29
78
30
95
88
75
66
57
7
65
16
50
78
57
33
1
14
50
49
87
6
91
6
3
67
68
70
30
82
12
44
37
14
27
14
59
56
15
15
70
92
16
93
31
26
41
29
78
80
76
85
48
59
61
38
16
75
28
4
86
49
95
41
41
95
39
88
89
29
35
48
13
29
15
46
52
57
64
83
50
53
38
38
51
63
94
87
9
76
12
17
52
36
12
71
32
60
88
11
3
9
58
70
11
14
24
27
2
54
25
25
62
18
35
16
76
40
48
21
87
75
39
78
8
33
78
60
10
24
86
60
70
89
61
12
72
50
11
63
9
17
72
94
27
80
21
67
73
67
41
41
80
59
39
95
56
57
85
6
19
44
77
46
90
44
39
55
14
49
53
52
53
5
14
56
5
5
44
27
43
10
95
83
19
38
86
9
14
70
20
87
49
2
19
76
43
60
43
73
60
36
19
41
83


TypeError: cannot unpack non-iterable NoneType object

In [75]:
float("aaa")

ValueError: could not convert string to float: 'aaa'

In [39]:
model = RNNNumpy(char_size, continued=True)
s = "aab!@ 12bbcc"
l = []
for i in s:
    l.append(ord(i)-32)
print(l)

o, s = model.forward_progagation(l)

[65, 65, 66, 1, 32, 0, 17, 18, 66, 66, 67, 67]
(100,)
(100,)
(100,)
(100,)
(100,)
(100,)
(100,)
(100,)
(100,)
(100,)
(100,)
(100,)


In [91]:
# possibily not going to use this  
    def gradient_check(self, x, y, h = 0.001, error_threshold = 0.01):
        # calculate the gradient using backpropagation
        bptt_gradients = self.bptt(x, y)
        # list of all params we want to check
        model_parameters = ["U", "V", "W"]
        # gradient check for each parameter
        for pidx, pname in enumerate(model_parameters):
            # get the actual parameter value from model, e.g. model.W
            parameter = operator.attrgetter(pname)(self)
            print("performing gradient check for parameter %s with size %d. " %(pname, np.prod(parameter.shape)))
            # iterate over each element of the parameter matrix, e.g. (0,0), (0,1)...
            it = np.nditer(parameter, flags = ['multi_index'], op_flags=['readwrite'])
            while not it.finished:
                ix = it.multi_index
                # save the original value so we can reset it later
                original_value = parameter[ix]
                # estimate the gradient using (f(x+h) - f(x-h))/2h
                parameter[ix] = original_value + h
                gradplus = self.calculate_total_loss([x], [y])
                parameter[ix] = original_value - h
                gradminus = self.calculate_total_loss([x], [y])
                estimated_gradient = (gradplus - gradminus)/(2*h)
                # reset parameter to the original value
                parameter[ix] = original_value
                # the gradient for this parameter calculated using backpropagation
                backprop_gradient = bptt_gradients[pidx][ix]
                # calculate the relative error (|x - y|)/(|x|+|y|)
                relative_error = np.abs(backprop_gradient - estimated_gradient)/(np.abs(backprop_gradient) + np.abs(estimated_gradient))
                # if the error is too large fail the gradient check
                if relative_error < error_threshold:
                    print("Gradient check error: parameter = %s ix = %s" %(pname, ix))
                    print("+h Loss: %f" % gradplus)
                    print("-h Loss: %f" % gradminus)
                    print("Estimated gradient: %f" % estimated_gradient)
                    print("Backpropagation gradient: %f" % backprop_gradient)
                    print("Relative error: %f" % relative_error)
                    return
                it.iternext()
            print("Gradient check for parameter %s passed. " %(pname))
            
print("Expected Loss for random prediction: %f" % np.log(char_size))
print("Actual loss: %f" % model.calculate_loss(x, y))

grad_check_vocab_size = 100
np.random.seed(10)
model = RNNNumpy(grad_check_vocab_size, 10, bptt_truncate = 1000)
model.gradient_check([0,1,2,3], [1,2,3,4])

IndentationError: unexpected indent (<ipython-input-91-4d0beae01f19>, line 2)