In [139]:
import numpy as np
import pandas as pd
import math
import hashlib
import re

# Load Dataframe

In [140]:
# 100000 most popular passwords
df = pd.read_csv('Datasets/popular_100000.csv')

# Drop totals of 0 (only using data for passwords pwned >= 10 times)
df = df[df['Total'] != 0]

# Subtract 10 from all totals to compensate for dropped data
df['Total'] = df['Total'] - 10

In [144]:
df.head()

Unnamed: 0,Password,SHA_1,Total,Length
0,123456,7C4A8D09CA3762AF61E59520943DC26494F8941B,23547443,6
1,password,5BAA61E4C9B93F3F0682250B6CF8331B7EE68FD8,3730461,8
2,12345678,7C222FB2927D828AF22F592134E8932480637C0D,2938584,8
3,qwerty,B1B3773A05C0ED0176787A4F1574FF0075F7521E,3912806,6
4,123456789,F7C3BC1D808E04732ADF679965CCC34CA7AE3441,7799804,9


# Password Length

In [142]:
# Function to add password length
def length(df):
    df['Length'] = df['Password'].astype(str).map(len)
    return df

df = length(df)

In [143]:
# Function to drop passwords below length 6
def drop_short(df):
    df = df[df['Length'] >= 6]
    return df

df = drop_short(df)

In [145]:
# Function to add password length squared
def len_sq(df):
    df['Length_Sq'] = df['Length']**2
    return df

df = len_sq(df)

In [146]:
# Function to add password length cubed
def len_cb(df):
    df['Length_Cb'] = df['Length']**3
    return df

df = len_cb(df)

In [147]:
# Function to add password length as proportion of longest password
def len_pp(df):
    longest = max(df['Length'])
    df['Length_Pp'] = df['Length']/longest
    
len_pp(df)

# Password Composition

In [2]:
# Function to check if only letters
def only_let(pwd):
    reg = '^[a-zA-Z]+$'
    if re.search(reg, pwd)!=None:
        return 1
    else:
        return 0

In [3]:
# Function to check if only digits
def only_dig(pwd):
    reg = '^\d+$'
    if re.search(reg, pwd)!=None:
        return 1
    else:
        return 0

In [4]:
# Function to check if only special chars
def only_spc(pwd):
    reg = '^\W+$'
    if re.search(reg, pwd)!=None:
        return 1
    else:
        return 0

In [5]:
# Function to check if only letters and digits
def let_and_dig(pwd):
    let, dig, spc = ('(?=[a-zA-Z])', '(?=\d)', '(?=\W)')
    if (re.search(let, pwd)!=None) and (re.search(dig, pwd)!=None) and (re.search(spc, pwd)==None):
        return 1
    else:
        return 0

In [6]:
# Function to check if only letters and special chars
def let_and_spc(pwd):
    let, dig, spc = ('(?=[a-zA-Z])', '(?=\d)', '(?=\W)')
    if (re.search(let, pwd)!=None) and (re.search(spc, pwd)!=None) and (re.search(dig, pwd)==None):
        return 1
    else:
        return 0

In [7]:
# Function to check if only digits and special chars
def dig_and_spc(pwd):
    let, dig, spc = ('(?=[a-zA-Z])', '(?=\d)', '(?=\W)')
    if (re.search(dig, pwd)!=None) and (re.search(spc, pwd)!=None) and (re.search(let, pwd)==None):
        return 1
    else:
        return 0

# Check for Dates

In [8]:
# Function to check for full dates
def full_date(pwd):
    b, e, s = ('^', '$', '(\/|\-|\.)')
    yyyy, yy, mm, m, dd, d = ('\d{4}', '\d{2}', '(((0)[0-9])|((1)[0-2]))',
                              '[0-9]', '([0-2][0-9]|(3)[0-1])', '[0-9]')
    if (re.search(b+yyyy+s+mm+s+dd+e, pwd)!=None) or (re.search(b+yyyy+s+m+s+dd+e, pwd)!=None) or (
        re.search(b+yyyy+s+mm+s+d+e, pwd)!=None) or (re.search(b+yyyy+s+m+s+d+e, pwd)!=None) or (
        re.search(b+dd+s+mm+s+yyyy+e, pwd)!=None) or (re.search(b+d+s+mm+s+yyyy+e, pwd)!=None) or (
        re.search(b+dd+s+m+s+yyyy+e, pwd)!=None) or (re.search(b+d+s+m+s+yyyy+e, pwd)!=None) or (
        re.search(b+mm+s+dd+s+yyyy+e, pwd)!=None) or (re.search(b+m+s+dd+s+yyyy+e, pwd)!=None) or (
        re.search(b+mm+s+d+s+yyyy+e, pwd)!=None) or (re.search(b+m+s+d+s+yyyy+e, pwd)!=None) or (
        re.search(b+yy+s+mm+s+dd+e, pwd)!=None) or (re.search(b+yy+s+m+s+dd+e, pwd)!=None) or (
        re.search(b+yy+s+mm+s+d+e, pwd)!=None) or (re.search(b+yy+s+m+s+d+e, pwd)!=None) or (
        re.search(b+dd+s+mm+s+yy+e, pwd)!=None) or (re.search(b+d+s+mm+s+yy+e, pwd)!=None) or (
        re.search(b+dd+s+m+s+yy+e, pwd)!=None) or (re.search(b+d+s+m+s+yy+e, pwd)!=None) or (
        re.search(b+mm+s+dd+s+yy+e, pwd)!=None) or (re.search(b+m+s+dd+s+yy+e, pwd)!=None) or (
        re.search(b+mm+s+d+s+yy+e, pwd)!=None) or (re.search(b+m+s+d+s+yy+e, pwd)!=None):
        return 1
    else:
        return 0

In [9]:
# Function to check for year (four-digit number starting with 1 or 2)
def has_year(pwd):
    reg = '(?<!\d)(?![0,3-9]\d\d\d)\d{4}(?!\d)'
    if re.search(reg, pwd)!=None:
        return 1
    else:
        return 0

# Check for Repeated Chars

In [10]:
# Function to check for two repeated characters
def rep_2(pwd):
    two = re.compile(r'(\w|\W)\1{1}').search(pwd)
    three = re.compile(r'(\w|\W)\1{2}').search(pwd)
    if two and not three:
        return 1
    else:
        return 0

In [11]:
# Function to check for three repeated characters
def rep_3(pwd):
    three = re.compile(r'(\w|\W)\1{2}').search(pwd)
    four = re.compile(r'(\w|\W)\1{3}').search(pwd)
    if three and not four:
        return 1
    else:
        return 0

In [12]:
# Function to check for four repeated characters
def rep_4(pwd):
    four = re.compile(r'(\w|\W)\1{3}').search(pwd)
    five = re.compile(r'(\w|\W)\1{4}').search(pwd)
    if four and not five:
        return 1
    else:
        return 0

In [13]:
# Function to check for five repeated characters
def rep_5(pwd):
    five = re.compile(r'(\w|\W)\1{4}').search(pwd)
    six = re.compile(r'(\w|\W)\1{5}').search(pwd)
    if five and not six:
        return 1
    else:
        return 0

In [14]:
# Function to check for six repeated characters
def rep_6(pwd):
    six = re.compile(r'(\w|\W)\1{5}').search(pwd)
    seven = re.compile(r'(\w|\W)\1{6}').search(pwd)
    if six and not seven:
        return 1
    else:
        return 0

In [15]:
# Function to check for seven repeated characters
def rep_7(pwd):
    seven = re.compile(r'(\w|\W)\1{6}').search(pwd)
    eight = re.compile(r'(\w|\W)\1{7}').search(pwd)
    if seven and not eight:
        return 1
    else:
        return 0

In [16]:
# Function to check for eight repeated characters
def rep_8(pwd):
    eight = re.compile(r'(\w|\W)\1{7}').search(pwd)
    nine = re.compile(r'(\w|\W)\1{8}').search(pwd)
    if eight and not nine:
        return 1
    else:
        return 0

In [17]:
# Function to check for nine repeated characters
def rep_9(pwd):
    nine = re.compile(r'(\w|\W)\1{8}').search(pwd)
    ten = re.compile(r'(\w|\W)\1{9}').search(pwd)
    if nine and not ten:
        return 1
    else:
        return 0

In [18]:
# Function to check for ten or more repeated characters
def rep_10(pwd):
    ten = re.compile(r'(\w|\W)\1{9}').search(pwd)
    if ten:
        return 1
    else:
        return 0

# Check for Consecutive Ascending Digits

In [19]:
# Function to check for two consecutive ascending digits
def asc_d_2(pwd):
    two = ['01', '12', '23', '34', '45', '56', '67', '78', '89']
    return 1 if any(i in pwd for i in two) else 0

In [20]:
# Function to check for three consecutive ascending digits
def asc_d_3(pwd):
    three = ['012', '123', '234', '345', '456', '567', '678', '789']
    return 1 if any(i in pwd for i in three) else 0

In [21]:
# Function to check for four consecutive ascending digits
def asc_d_4(pwd):
    four = ['0123', '1234', '2345', '3456', '4567', '5678', '6789']
    return 1 if any(i in pwd for i in four) else 0

In [22]:
# Function to check for five consecutive ascending digits
def asc_d_5(pwd):
    five = ['01234', '12345', '23456', '34567', '45678', '56789']
    return 1 if any(i in pwd for i in five) else 0

In [23]:
# Function to check for six consecutive ascending digits
def asc_d_6(pwd):
    six = ['012345', '123456', '234567', '345678', '456789']
    return 1 if any(i in pwd for i in six) else 0

In [24]:
# Function to check for seven consecutive ascending digits
def asc_d_7(pwd):
    seven = ['0123456', '1234567', '2345678', '3456789']
    return 1 if any(i in pwd for i in seven) else 0

In [25]:
# Function to check for eight consecutive ascending digits
def asc_d_8(pwd):
    eight = ['01234567', '12345678', '23456789']
    return 1 if any(i in pwd for i in eight) else 0

In [26]:
# Function to check for nine consecutive ascending digits
def asc_d_9(pwd):
    nine = ['012345678', '123456789']
    return 1 if any(i in pwd for i in nine) else 0

In [27]:
# Function to check for ten consecutive ascending digits
def asc_d_10(pwd):
    ten = ['0123456789']
    return 1 if any(i in pwd for i in ten) else 0

# Check for Consecutive Descending Digits

In [28]:
# Function to check for two consecutive descending digits
def dec_d_2(pwd):
    two = ['98', '87', '76', '65', '54', '43', '32', '21', '10']
    return 1 if any(i in pwd for i in two) else 0

In [29]:
# Function to check for three consecutive descending digits
def dec_d_3(pwd):
    three = ['987', '876', '765', '654', '543', '432', '321', '210']
    return 1 if any(i in pwd for i in three) else 0

In [30]:
# Function to check for four consecutive descending digits
def dec_d_4(pwd):
    four = ['9876', '8765', '7654', '6543', '5432', '4321', '3210']
    return 1 if any(i in pwd for i in four) else 0

In [31]:
# Function to check for five consecutive descending digits
def dec_d_5(pwd):
    five = ['98765', '87654', '76543', '65432', '54321', '43210']
    return 1 if any(i in pwd for i in five) else 0

In [32]:
# Function to check for six consecutive descending digits
def dec_d_6(pwd):
    six = ['987654', '876543', '765432', '654321', '543210']
    return 1 if any(i in pwd for i in six) else 0

In [33]:
# Function to check for seven consecutive descending digits
def dec_d_7(pwd):
    seven = ['9876543', '8765432', '7654321', '6543210']
    return 1 if any(i in pwd for i in seven) else 0

In [34]:
# Function to check for eight consecutive descending digits
def dec_d_8(pwd):
    eight = ['98765432', '87654321', '76543210']
    return 1 if any(i in pwd for i in eight) else 0

In [35]:
# Function to check for nine consecutive descending digits
def dec_d_9(pwd):
    nine = ['987654321', '876543210']
    return 1 if any(i in pwd for i in nine) else 0

In [36]:
# Function to check for ten consecutive descending digits
def dec_d_10(pwd):
    ten = ['9876543210']
    return 1 if any(i in pwd for i in ten) else 0

# Check for Consecutive Ascending Letters

In [37]:
# Function to check for two consecutive ascending letters
def asc_l_2(pwd):
    two = ['ab', 'bc', 'cd', 'de', 'ef', 'fg', 'gh', 'hi', 'ij', 'jk', 'kl', 'lm', 'mn',
           'no', 'op', 'pq', 'qr', 'rs', 'st', 'tu', 'uv', 'vw', 'wx', 'xy', 'yz']
    return 1 if any(i in pwd for i in two) else 0

In [38]:
# Function to check for three consecutive ascending letters
def asc_l_3(pwd):
    three = ['abc', 'bcd', 'cde', 'def', 'efg', 'fgh', 'ghi', 'hij', 'ijk', 'jkl', 'klm',
             'lmn', 'mno', 'nop', 'opq', 'pqr', 'qrs', 'rst', 'stu', 'tuv', 'uvw', 'vwx',
             'wxy', 'xyz']
    return 1 if any(i in pwd for i in three) else 0

In [39]:
# Function to check for four consecutive ascending letters
def asc_l_4(pwd):
    four = ['abcd', 'bcde', 'cdef', 'defg', 'efgh', 'fghi', 'ghij', 'hijk', 'ijkl', 'jklm',
            'klmn', 'lmno', 'mnop', 'nopq', 'opqr', 'pqrs', 'qrst', 'rstu', 'stuv', 'tuvw',
            'uvwx', 'vwxy', 'wxyz']
    return 1 if any(i in pwd for i in four) else 0

In [40]:
# Function to check for five consecutive ascending letters
def asc_l_5(pwd):
    five = ['abcde', 'bcdef', 'cdefg', 'defgh', 'efghi', 'fghij', 'ghijk', 'hijkl', 'ijklm',
            'jklmn', 'klmno', 'lmnop', 'mnopq', 'nopqr', 'opqrs', 'pqrst', 'qrstu', 'rstuv',
            'stuvw', 'tuvwx', 'uvwxy', 'vwxyz']
    return 1 if any(i in pwd for i in five) else 0

In [41]:
# Function to check for six consecutive ascending letters
def asc_l_6(pwd):
    six = ['abcdef', 'bcdefg', 'cdefgh', 'defghi', 'efghij', 'fghijk', 'ghijkl', 'hijklm',
           'ijklmn', 'jklmno', 'klmnop', 'lmnopq', 'mnopqr', 'nopqrs', 'opqrst', 'pqrstu',
           'qrstuv', 'rstuvw', 'stuvwx', 'tuvwxy', 'uvwxyz']
    return 1 if any(i in pwd for i in six) else 0

In [42]:
# Function to check for seven consecutive ascending letters
def asc_l_7(pwd):
    seven = ['abcdefg', 'bcdefgh', 'cdefghi', 'defghij', 'efghijk', 'fghijkl', 'ghijklm',
             'hijklmn', 'ijklmno', 'jklmnop', 'klmnopq', 'lmnopqr', 'mnopqrs', 'nopqrst',
             'opqrstu', 'pqrstuv', 'qrstuvw', 'rstuvwx', 'stuvwxy', 'tuvwxyz']
    return 1 if any(i in pwd for i in seven) else 0

In [43]:
# Function to check for eight consecutive ascending letters
def asc_l_8(pwd):
    eight = ['abcdefgh', 'bcdefghi', 'cdefghij', 'defghijk', 'efghijkl', 'fghijklm',
             'ghijklmn', 'hijklmno', 'ijklmnop', 'jklmnopq', 'klmnopqr', 'lmnopqrs',
             'mnopqrst', 'nopqrstu', 'opqrstuv', 'pqrstuvw', 'qrstuvwx', 'rstuvwxy',
             'stuvwxyz']
    return 1 if any(i in pwd for i in eight) else 0

In [44]:
# Function to check for nine consecutive ascending letters
def asc_l_9(pwd):
    nine = ['abcdefghi', 'bcdefghij', 'cdefghijk', 'defghijkl', 'efghijklm', 'fghijklmn',
            'ghijklmno', 'hijklmnop', 'ijklmnopq', 'jklmnopqr', 'klmnopqrs', 'lmnopqrst',
            'mnopqrstu', 'nopqrstuv', 'opqrstuvw', 'pqrstuvwx', 'qrstuvwxy', 'rstuvwxyz']
    return 1 if any(i in pwd for i in nine) else 0

In [45]:
# Function to check for ten consecutive ascending letters
def asc_l_10(pwd):
    ten = ['abcdefghij', 'bcdefghijk', 'cdefghijkl', 'defghijklm', 'efghijklmn', 'fghijklmno',
           'ghijklmnop', 'hijklmnopq', 'ijklmnopqr', 'jklmnopqrs', 'klmnopqrst', 'lmnopqrstu',
           'mnopqrstuv', 'nopqrstuvw', 'opqrstuvwx', 'pqrstuvwxy', 'qrstuvwxyz']
    return 1 if any(i in pwd for i in ten) else 0

# Check for Consecutive Descending Letters

In [46]:
# Function to check for two consecutive descending letters
def dec_l_2(pwd):
    two = ['zy', 'yx', 'xw', 'wv', 'vu', 'ut', 'ts', 'sr', 'rq', 'qp', 'po', 'on', 'nm',
           'ml', 'lk', 'kj', 'ji', 'ih', 'hg', 'gf', 'fe', 'ed', 'dc', 'cb', 'ba']
    return 1 if any(i in pwd for i in two) else 0

In [47]:
# Function to check for three consecutive descending letters
def dec_l_3(pwd):
    three = ['zyx', 'yxw', 'xwv', 'wvu', 'vut', 'uts', 'tsr', 'srq', 'rqp', 'qpo', 'pon',
             'onm', 'nml', 'mlk', 'lkj', 'kji', 'jih', 'ihg', 'hgf', 'gfe', 'fed', 'edc',
             'dcb', 'cba']
    return 1 if any(i in pwd for i in three) else 0

In [48]:
# Function to check for four consecutive descending letters
def dec_l_4(pwd):
    four = ['zyxw', 'yxwv', 'xwvu', 'wvut', 'vuts', 'utsr', 'tsrq', 'srqp', 'rqpo', 'qpon',
            'ponm', 'onml', 'nmlk', 'mlkj', 'lkji', 'kjih', 'jihg', 'ihgf', 'hgfe', 'gfed',
            'fedc', 'edcb', 'dcba']
    return 1 if any(i in pwd for i in four) else 0

In [49]:
# Function to check for five consecutive descending letters
def dec_l_5(pwd):
    five = ['zyxwv', 'yxwvu', 'xwvut', 'wvuts', 'vutsr', 'utsrq', 'tsrqp', 'srqpo', 'rqpon',
            'qponm', 'ponml', 'onmlk', 'nmlkj', 'mlkji', 'lkjih', 'kjihg', 'jihgf', 'ihgfe',
            'hgfed', 'gfedc', 'fedcb', 'edcba']
    return 1 if any(i in pwd for i in five) else 0

In [50]:
# Function to check for six consecutive descending letters
def dec_l_6(pwd):
    six = ['zyxwvu', 'yxwvut', 'xwvuts', 'wvutsr', 'vutsrq', 'utsrqp', 'tsrqpo', 'srqpon',
           'rqponm', 'qponml', 'ponmlk', 'onmlkj', 'nmlkji', 'mlkjih', 'lkjihg', 'kjihgf',
           'jihgfe', 'ihgfed', 'hgfedc', 'gfedcb', 'fedcba']
    return 1 if any(i in pwd for i in six) else 0

In [51]:
# Function to check for seven consecutive descending letters
def dec_l_7(pwd):
    seven = ['zyxwvut', 'yxwvuts', 'xwvutsr', 'wvutsrq', 'vutsrqp', 'utsrqpo', 'tsrqpon',
             'srqponm', 'rqponml', 'qponmlk', 'ponmlkj', 'onmlkji', 'nmlkjih', 'mlkjihg',
             'lkjihgf', 'kjihgfe', 'jihgfed', 'ihgfedc', 'hgfedcb', 'gfedcba']
    return 1 if any(i in pwd for i in seven) else 0

In [52]:
# Function to check for eight consecutive descending letters
def dec_l_8(pwd):
    eight = ['zyxwvuts', 'yxwvutsr', 'xwvutsrq', 'wvutsrqp', 'vutsrqpo', 'utsrqpon',
             'tsrqponm', 'srqponml', 'rqponmlk', 'qponmlkj', 'ponmlkji', 'onmlkjih',
             'nmlkjihg', 'mlkjihgf', 'lkjihgfe', 'kjihgfed', 'jihgfedc', 'ihgfedcb',
             'hgfedcba']
    return 1 if any(i in pwd for i in eight) else 0

In [53]:
# Function to check for nine consecutive descending letters
def dec_l_9(pwd):
    nine = ['zyxwvutsr', 'yxwvutsrq', 'xwvutsrqp', 'wvutsrqpo', 'vutsrqpon', 'utsrqponm',
            'tsrqponml', 'srqponmlk', 'rqponmlkj', 'qponmlkji', 'ponmlkjih', 'onmlkjihg',
            'nmlkjihgf', 'mlkjihgfe', 'lkjihgfed', 'kjihgfedc', 'jihgfedcb', 'ihgfedcba']
    return 1 if any(i in pwd for i in nine) else 0

In [54]:
# Function to check for ten consecutive descending letters
def dec_l_10(pwd):
    ten = ['zyxwvutsrq', 'yxwvutsrqp', 'xwvutsrqpo', 'wvutsrqpon', 'vutsrqponm', 'utsrqponml',
           'tsrqponmlk', 'srqponmlkj', 'rqponmlkji', 'qponmlkjih', 'ponmlkjihg', 'onmlkjihgf',
           'nmlkjihgfe', 'mlkjihgfed', 'lkjihgfedc', 'kjihgfedcb', 'jihgfedcba']
    return 1 if any(i in pwd for i in ten) else 0