# パスワードの強さを確認しよう

まずは、データセットを読み込んで、確認しましょう。

In [1]:
# 生のデータセットを読み込む
def load_raw_dataset():

    import csv
    
    raw_dataset = []

    csv_file = open("data/password_strength.csv", newline='', encoding="utf-8_sig")
    reader = csv.reader(csv_file)

    # csv ファイルの各行を確認
    for line in reader:
         
        password = line[0]
        strength = line[1]
        
        # 強さが数値じゃなければ、使わない
        if not strength.isnumeric():
            continue
    
        raw_dataset.append((password, int(strength)))

    return raw_dataset

In [2]:
# 読み込んで、最初の20件を表示
raw_dataset = load_raw_dataset()
print(raw_dataset[0:20])

[('nahpets', 0), ('sasa2727', 1), ('lilboost', 2), ('4408tiiyt', 3), ('schuessler', 3), ('1033704045', 3), ('smurfy23', 2), ('milagrsy gernys', 4), ('7h0mp50n', 0), ('wutsuphomeboy', 3), ('TERESA', 0), ('hoddboty1', 3), ('patty', 0), ('lucky038', 1), ('Tummykiwi151', 4), ('pipsonly1', 3), ('akamaru16', 3), ('45192480093', 3), ('smurf26', 2), ('25761838', 2)]


## 前処理

パスワード自体をネットワークの入力に変換できるため、前処理をしなければならない。

今回はパスワードのなｇ

In [3]:
def count_letters(password):
    
    count_length = 0
    count_small  = 0
    count_large  = 0
    count_number = 0
    count_symbol = 0   
        
    for ch in password:
        
        count_length += 1
        
        if ch.isdigit():
            count_number += 1
        elif ch.islower():
            count_small  += 1
        elif ch.isupper():
            count_large  += 1
        else:
            count_symbol += 1
            
    return count_length, count_small, count_large, count_number, count_symbol

In [4]:
count = count_letters("1234abCD!")
print(count)

(9, 2, 2, 4, 1)


In [5]:
def password_to_input(password):
    
    count = count_letters(password)
    max_length = 20
    
    x = [0, 0, 0, 0, 0]

    x[0] = min(count[0], max_length) / max_length
    x[1] = min(count[1], max_length) / max_length
    x[2] = min(count[2], max_length) / max_length
    x[3] = min(count[3], max_length) / max_length
    x[4] = min(count[4], 1)
    
    return x

In [6]:
test = "1234abCD!"
print (count_letters(test))
print (password_to_input(test))

(9, 2, 2, 4, 1)
[0.45, 0.1, 0.1, 0.2, 1]


In [7]:
test = "1234abCDDDD!!!!"
print (count_letters(test))
print (password_to_input(test))

(15, 2, 5, 4, 4)
[0.75, 0.1, 0.25, 0.2, 1]


In [8]:
def password_to_input(password):
    
    count = count_letters(password)
    max_length = 20
    
    x = [0, 0, 0, 0, 0]

    x[0] = min(count[0], max_length) / max_length
    x[1] = min(count[1], count[0]) / count[0]
    x[2] = min(count[2], count[0]) / count[0]
    x[3] = min(count[3], count[0]) / count[0]
    x[4] = min(count[4], 1)
    
    return x

In [9]:
test = "1234abCD!"
print (count_letters(test))
print (password_to_input(test))

(9, 2, 2, 4, 1)
[0.45, 0.2222222222222222, 0.2222222222222222, 0.4444444444444444, 1]


In [10]:
test = "1234abCDDDD!!!!"
print (count_letters(test))
print (password_to_input(test))

(15, 2, 5, 4, 4)
[0.75, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 1]


In [11]:
def strength_to_onehot(strength):
    
    y = [0.0, 0.0, 0.0, 0.0, 0.0]
    y[strength] = 1.0
    
    return y

In [12]:
import numpy as np

def prepare_dataset(raw_dataset):
    
    data_x = []
    data_y = []
    
    for line in raw_dataset:
        
        password = line[0]
        strength = line[1]

        x = password_to_input(password)
        y = strength_to_onehot(strength)
    
        data_x.append(x)
        data_y.append(y)

    data_x = np.array(data_x)
    data_y = np.array(data_y)
    
    return data_x, data_y

data_x, data_y = prepare_dataset(raw_dataset)

In [13]:
print(data_x[0:20])
print(data_y[0:20])

[[0.35       1.         0.         0.         0.        ]
 [0.4        0.5        0.         0.5        0.        ]
 [0.4        1.         0.         0.         0.        ]
 [0.45       0.55555556 0.         0.44444444 0.        ]
 [0.5        1.         0.         0.         0.        ]
 [0.5        0.         0.         1.         0.        ]
 [0.4        0.75       0.         0.25       0.        ]
 [0.75       0.93333333 0.         0.         1.        ]
 [0.4        0.5        0.         0.5        0.        ]
 [0.65       1.         0.         0.         0.        ]
 [0.3        0.         1.         0.         0.        ]
 [0.45       0.88888889 0.         0.11111111 0.        ]
 [0.25       1.         0.         0.         0.        ]
 [0.4        0.625      0.         0.375      0.        ]
 [0.6        0.66666667 0.08333333 0.25       0.        ]
 [0.45       0.88888889 0.         0.11111111 0.        ]
 [0.45       0.77777778 0.         0.22222222 0.        ]
 [0.55       0

In [14]:
from keras.layers import Input, Dense
from keras.models import Model

def create_model():
    
    lyr_input = Input(shape=(5,))
    lyr_hiddn = Dense(units=16, activation="relu")(lyr_input)
    lyr_hiddn = Dense(units=32, activation="relu")(lyr_hiddn)
    lyr_hiddn = Dense(units=64, activation="relu")(lyr_hiddn)
    lyr_hiddn = Dense(units=32, activation="relu")(lyr_hiddn)
    lyr_hiddn = Dense(units=16, activation="relu")(lyr_hiddn)
    lyr_outpt = Dense(units=5, activation="softmax")(lyr_hiddn)
    
    model = Model(inputs=lyr_input, outputs=lyr_outpt)
    model.summary()
    
    model.compile(optimizer="adam", loss="categorical_crossentropy")
    
    return model
    
model = create_model()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 5)]               0         
_________________________________________________________________
dense (Dense)                (None, 16)                96        
_________________________________________________________________
dense_1 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_2 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_5 (Dense)              (None, 5)                

In [15]:
# 学習開始
# 検証用のデータ割合：20%
model.fit(data_x, data_y, epochs=20, batch_size=50, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x271986a73a0>

In [16]:
np.set_printoptions(suppress = True)

def test_password(password):
    
    # １つのパスワードを確認
    x = password_to_input(password)
    x = [x]
    x = np.array(x)
    
    # 推定
    y = model.predict(x)[0]

    print("Prediction:", y)
    
    # もっとも信頼性が高いものがどれ？
    best = np.argmax(y)
    confidence = y[best] * 100
    
    # 強さを1~5で表示
    print("Password Strength", best + 1)
    
    # 信頼性も表示する
    print("Confidence:", confidence)

In [17]:
test_password("password")

Prediction: [0.23067302 0.25865573 0.5104322  0.00023915 0.        ]
Password Strength 3
Confidence: 51.04321837425232


In [18]:
test_password("1sdssig3!")

Prediction: [0.02724372 0.20591702 0.27530718 0.49153206 0.        ]
Password Strength 4
Confidence: 49.15320575237274


In [19]:
test_password("abcd1234")

Prediction: [0.06281839 0.17576934 0.76135224 0.00005996 0.        ]
Password Strength 3
Confidence: 76.13522410392761


In [20]:
test_password("Ab#d!23X")

Prediction: [0.12943123 0.06086858 0.8096852  0.00001495 0.        ]
Password Strength 3
Confidence: 80.96851706504822


In [21]:
test_password("1e@332!!#asAAS")

Prediction: [0.00028184 0.00079277 0.00209533 0.05499764 0.94183236]
Password Strength 5
Confidence: 94.18323636054993


In [22]:
test_password("aaaaaaaaaaaaaa")

Prediction: [0.06595595 0.0398076  0.0328228  0.14875019 0.7126634 ]
Password Strength 5
Confidence: 71.26634120941162


In [23]:
test = "11111111111"
print(password_to_input(test))
test_password(test)

[0.55, 0.0, 0.0, 1.0, 0]
Prediction: [0.00950857 0.00985732 0.03383746 0.8842059  0.06259079]
Password Strength 4
Confidence: 88.42058777809143
