# パスワードの強さを確認しよう

まずは、データセットを読み込んで、確認しましょう。

In [1]:
# 生のデータセットを読み込む
def load_raw_dataset():

    import csv
    
    raw_dataset = []

    csv_file = open("data/password_strength.csv", newline='', encoding="utf-8_sig")
    reader = csv.reader(csv_file)

    # csv ファイルの各行を確認
    for line in reader:
         
        password = line[0]
        strength = line[1]
        
        # 強さが数値じゃなければ、使わない
        if not strength.isnumeric():
            continue
    
        raw_dataset.append((password, int(strength)))

    return raw_dataset

In [2]:
# 読み込んで、最初の20件を表示
raw_dataset = load_raw_dataset()
print(raw_dataset[0:20])

[('INTONG', 1), ('kayda!', 1), ('0897544900', 3), ('ROB3RT', 0), ('foreverz088', 2), ('aoaekajojoe', 4), ('tiffany', 0), ('nique1684', 3), ('humphrey12', 1), ('liquidsnakesolid', 4), ('poomins04', 3), ('hugoma', 1), ('20244912', 1), ('jamesferguson', 1), ('prosentia', 2), ('0876995162', 3), ('londres_16', 3), ('085009500121', 3), ('P@SSw0rd', 0), ('pisayki', 2)]


## 前処理

パスワード自体をネットワークの入力に変換できるため、前処理をしなければならない。

今回はパスワードのなｇ

In [3]:
def count_letters(password):
    
    count_length = 0
    count_small  = 0
    count_large  = 0
    count_number = 0
    count_symbol = 0   
        
    for ch in password:
        
        count_length += 1
        
        if ch.isdigit():
            count_number += 1
        elif ch.islower():
            count_small  += 1
        elif ch.isupper():
            count_large  += 1
        else:
            count_symbol += 1
            
    return count_length, count_small, count_large, count_number, count_symbol

In [4]:
count = count_letters("1234abCD!")
print(count)

(9, 2, 2, 4, 1)


In [5]:
def password_to_input(password):
    
    count = count_letters(password)
    max_length = 20
    
    x = [0, 0, 0, 0, 0]

    x[0] = min(count[0], max_length) / max_length
    x[1] = min(count[1], max_length) / max_length
    x[2] = min(count[2], max_length) / max_length
    x[3] = min(count[3], max_length) / max_length
    x[4] = min(count[4], 1)
    
    return x

In [6]:
test = "1234abCD!"
print (count_letters(test))
print (password_to_input(test))

(9, 2, 2, 4, 1)
[0.45, 0.1, 0.1, 0.2, 1]


In [7]:
test = "1234abCDDDD!!!!"
print (count_letters(test))
print (password_to_input(test))

(15, 2, 5, 4, 4)
[0.75, 0.1, 0.25, 0.2, 1]


In [8]:
def password_to_input(password):
    
    count = count_letters(password)
    max_length = 20
    
    x = [0, 0, 0, 0, 0]

    x[0] = min(count[0], max_length) / max_length
    x[1] = min(count[1], count[0]) / count[0]
    x[2] = min(count[2], count[0]) / count[0]
    x[3] = min(count[3], count[0]) / count[0]
    x[4] = min(count[4], 1)
    
    return x

In [9]:
test = "1234abCD!"
print (count_letters(test))
print (password_to_input(test))

(9, 2, 2, 4, 1)
[0.45, 0.2222222222222222, 0.2222222222222222, 0.4444444444444444, 1]


In [10]:
test = "1234abCDDDD!!!!"
print (count_letters(test))
print (password_to_input(test))

(15, 2, 5, 4, 4)
[0.75, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 1]


In [11]:
def strength_to_onehot(strength):
    
    y = [0.0, 0.0, 0.0, 0.0, 0.0]
    y[strength] = 1.0
    
    return y

In [12]:
import numpy as np

def prepare_dataset(raw_dataset):
    
    data_x = []
    data_y = []
    
    for line in raw_dataset:
        
        password = line[0]
        strength = line[1]

        x = password_to_input(password)
        y = strength_to_onehot(strength)
    
        data_x.append(x)
        data_y.append(y)

    data_x = np.array(data_x)
    data_y = np.array(data_y)
    
    return data_x, data_y

data_x, data_y = prepare_dataset(raw_dataset)

In [13]:
print(data_x[0:20])
print(data_y[0:20])

[[0.3        0.         1.         0.         0.        ]
 [0.3        0.83333333 0.         0.         1.        ]
 [0.5        0.         0.         1.         0.        ]
 [0.3        0.         0.83333333 0.16666667 0.        ]
 [0.55       0.72727273 0.         0.27272727 0.        ]
 [0.55       1.         0.         0.         0.        ]
 [0.35       1.         0.         0.         0.        ]
 [0.45       0.55555556 0.         0.44444444 0.        ]
 [0.5        0.8        0.         0.2        0.        ]
 [0.8        1.         0.         0.         0.        ]
 [0.45       0.77777778 0.         0.22222222 0.        ]
 [0.3        1.         0.         0.         0.        ]
 [0.4        0.         0.         1.         0.        ]
 [0.65       1.         0.         0.         0.        ]
 [0.45       1.         0.         0.         0.        ]
 [0.5        0.         0.         1.         0.        ]
 [0.5        0.7        0.         0.2        1.        ]
 [0.6        0

In [14]:
from keras.layers import Input, Dense
from keras.models import Model

def create_model():
    
    lyr_input = Input(shape=(5,))
    lyr_hiddn = Dense(units=16, activation="relu")(lyr_input)
    lyr_hiddn = Dense(units=32, activation="relu")(lyr_hiddn)
    lyr_hiddn = Dense(units=64, activation="relu")(lyr_hiddn)
    lyr_hiddn = Dense(units=32, activation="relu")(lyr_hiddn)
    lyr_hiddn = Dense(units=16, activation="relu")(lyr_hiddn)
    lyr_outpt = Dense(units=5, activation="softmax")(lyr_hiddn)
    
    model = Model(inputs=lyr_input, outputs=lyr_outpt)
    model.summary()
    
    model.compile(optimizer="adam", loss="categorical_crossentropy")
    
    return model
    
model = create_model()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 5)]               0         
_________________________________________________________________
dense (Dense)                (None, 16)                96        
_________________________________________________________________
dense_1 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_2 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_5 (Dense)              (None, 5)                

In [15]:
# 学習開始
# 検証用のデータ割合：20%
model.fit(data_x, data_y, epochs=20, batch_size=50, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x2b32d1eeaf0>

In [16]:
np.set_printoptions(suppress = True)

def test_password(password):
    
    # １つのパスワードを確認
    x = password_to_input(password)
    x = [x]
    x = np.array(x)
    
    # 推定
    y = model.predict(x)[0]

    print("Prediction:", y)
    
    # もっとも信頼性が高いものがどれ？
    best = np.argmax(y)
    confidence = y[best] * 100
    
    # 強さを1~5で表示
    print("Password Strength", best + 1)
    
    # 信頼性も表示する
    print("Confidence:", confidence)

In [17]:
test_password("pA!sw0rd")

Prediction: [0.2896609  0.21102642 0.49916443 0.00014828 0.        ]
Password Strength 3
Confidence: 49.91644322872162


In [18]:
test_password("1sdssig3!")

Prediction: [0.01471189 0.10564335 0.28219    0.5974548  0.        ]
Password Strength 4
Confidence: 59.74547863006592


In [19]:
test_password("abcd1234")

Prediction: [0.06572372 0.15470873 0.7795445  0.00002308 0.        ]
Password Strength 3
Confidence: 77.9544472694397


In [20]:
test_password("Ab#d!23X")

Prediction: [0.10968023 0.19328046 0.6964912  0.00054811 0.        ]
Password Strength 3
Confidence: 69.64911818504333


In [21]:
test_password("1e@332!!#asAAS")

Prediction: [0.00111926 0.00133658 0.0012034  0.02142018 0.97492063]
Password Strength 5
Confidence: 97.49206304550171


In [22]:
test_password("aaaaaaaaaaaaaa")

Prediction: [0.0678362  0.02408047 0.0206981  0.14575726 0.74162793]
Password Strength 5
Confidence: 74.16279315948486


In [23]:
test = "11111111111"
print(password_to_input(test))
test_password(test)

[0.55, 0.0, 0.0, 1.0, 0]
Prediction: [0.01064994 0.00776803 0.0227388  0.94103974 0.01780351]
Password Strength 4
Confidence: 94.10397410392761
