In [60]:
# Importing the pandas module
import pandas as pd
# Loading in datasets/users.csv 
users = pd.read_csv('datasets/users.csv')

# Printing out how many users we've got
print(len(users))
# Taking a look at the 12 first users
users.head(12)

982


Unnamed: 0,id,user_name,password
0,1,vance.jennings,joobheco
1,2,consuelo.eaton,0869347314
2,3,mitchel.perkins,fabypotter
3,4,odessa.vaughan,aharney88
4,5,araceli.wilder,acecdn3000
5,6,shawn.harrington,5278049
6,7,evelyn.gay,master
7,8,noreen.hale,murphy
8,9,gladys.ward,lwsves2
9,10,brant.zimmerman,1190KAREN5572497


In [62]:
# Calculating the lengths of users' passwords
users['length'] = users['password'].str.len()

# Flagging the users with too short passwords
users['too_short'] = users['length'] < 8

# Counting and printing the number of users with too short passwords
print(users['too_short'].sum())
# Taking a look at the 12 first rows
users.head(12)

376


Unnamed: 0,id,user_name,password,length,too_short
0,1,vance.jennings,joobheco,8,False
1,2,consuelo.eaton,0869347314,10,False
2,3,mitchel.perkins,fabypotter,10,False
3,4,odessa.vaughan,aharney88,9,False
4,5,araceli.wilder,acecdn3000,10,False
5,6,shawn.harrington,5278049,7,True
6,7,evelyn.gay,master,6,True
7,8,noreen.hale,murphy,6,True
8,9,gladys.ward,lwsves2,7,True
9,10,brant.zimmerman,1190KAREN5572497,16,False


In [64]:
# Reading in the top 10000 passwords
common_passwords = pd.read_csv('datasets/10_million_password_list_top_10000.txt',header=None,squeeze=True)

# Taking a look at the top 20
common_passwords.head(20)

0        123456
1      password
2      12345678
3        qwerty
4     123456789
5         12345
6          1234
7        111111
8       1234567
9        dragon
10       123123
11     baseball
12       abc123
13     football
14       monkey
15      letmein
16       696969
17       shadow
18       master
19       666666
Name: 0, dtype: object

In [66]:
# Flagging the users with passwords that are common passwords
users['common_password'] = users['password'].isin(common_passwords[:10000])

# Counting and printing the number of users using common passwords
print(users['common_password'].sum())
# Taking a look at the 12 first rows
users.head(12)

129


Unnamed: 0,id,user_name,password,length,too_short,common_password
0,1,vance.jennings,joobheco,8,False,False
1,2,consuelo.eaton,0869347314,10,False,False
2,3,mitchel.perkins,fabypotter,10,False,False
3,4,odessa.vaughan,aharney88,9,False,False
4,5,araceli.wilder,acecdn3000,10,False,False
5,6,shawn.harrington,5278049,7,True,False
6,7,evelyn.gay,master,6,True,True
7,8,noreen.hale,murphy,6,True,True
8,9,gladys.ward,lwsves2,7,True,False
9,10,brant.zimmerman,1190KAREN5572497,16,False,False


In [68]:
# Reading in a list of the 10000 most common words
words = pd.read_csv('datasets/google-10000-english.txt',header=None,squeeze=True)

# Flagging the users with passwords that are common words
users['common_word'] = users['password'].str.lower().isin(words)

# Counting and printing the number of users using common words as passwords
print(users['common_word'].sum())
# Taking a look at the 12 first rows
users.head(12)

137


Unnamed: 0,id,user_name,password,length,too_short,common_password,common_word
0,1,vance.jennings,joobheco,8,False,False,False
1,2,consuelo.eaton,0869347314,10,False,False,False
2,3,mitchel.perkins,fabypotter,10,False,False,False
3,4,odessa.vaughan,aharney88,9,False,False,False
4,5,araceli.wilder,acecdn3000,10,False,False,False
5,6,shawn.harrington,5278049,7,True,False,False
6,7,evelyn.gay,master,6,True,True,True
7,8,noreen.hale,murphy,6,True,True,True
8,9,gladys.ward,lwsves2,7,True,False,False
9,10,brant.zimmerman,1190KAREN5572497,16,False,False,False


In [70]:
# Extracting first and last names into their own columns
users['first_name'] = users['user_name'].str.extract(r'(^\w+)', expand = False)
users['last_name'] = users['user_name'].str.extract(r'(\w+$)', expand = False)
#users['first_name'] = users['user_name'].str.split('.').str.get(0)
#users['last_name'] = users['user_name'].str.split('.').str.get(1)

# Flagging the users with passwords that matches their names
users['uses_name'] = ((users['password'] == users['first_name']) | (users['password'] == users['last_name']))

# Counting and printing the number of users using names as passwords
print(users['uses_name'].sum())

# Taking a look at the 12 first rows
users.head(12)

50


Unnamed: 0,id,user_name,password,length,too_short,common_password,common_word,first_name,last_name,uses_name
0,1,vance.jennings,joobheco,8,False,False,False,vance,jennings,False
1,2,consuelo.eaton,0869347314,10,False,False,False,consuelo,eaton,False
2,3,mitchel.perkins,fabypotter,10,False,False,False,mitchel,perkins,False
3,4,odessa.vaughan,aharney88,9,False,False,False,odessa,vaughan,False
4,5,araceli.wilder,acecdn3000,10,False,False,False,araceli,wilder,False
5,6,shawn.harrington,5278049,7,True,False,False,shawn,harrington,False
6,7,evelyn.gay,master,6,True,True,True,evelyn,gay,False
7,8,noreen.hale,murphy,6,True,True,True,noreen,hale,False
8,9,gladys.ward,lwsves2,7,True,False,False,gladys,ward,False
9,10,brant.zimmerman,1190KAREN5572497,16,False,False,False,brant,zimmerman,False


In [72]:
### Flagging the users with passwords with >= 4 repeats
#users['too_many_repeats'] = users['password'].str.contains(r'(.)\1{3,}')
users['too_many_repeats'] = users['password'].str.contains(r'(.)\1\1\1')

# Taking a look at the users with too many repeats
users.query('too_many_repeats == True')

Unnamed: 0,id,user_name,password,length,too_short,common_password,common_word,first_name,last_name,uses_name,too_many_repeats
146,147,patti.dixon,555555,6,True,True,False,patti,dixon,False,True
572,573,cornelia.bradley,555555,6,True,True,False,cornelia,bradley,False,True
644,645,essie.lopez,11111,5,True,True,False,essie,lopez,False,True
798,799,charley.key,888888,6,True,True,False,charley,key,False,True
807,808,thurman.osborne,rinnnng0,8,False,False,False,thurman,osborne,False,True
941,942,mitch.ferguson,aaaaaa,6,True,True,False,mitch,ferguson,False,True


In [74]:
# Flagging all passwords that are bad
#users['bad_password'] = users[['too_short','common_password','common_word','uses_name','too_many_repeats']].sum(axis=1).astype('bool')
users['bad_password'] = users['too_short'] | users['common_password'] | users['common_word'] | users['uses_name'] | users['too_many_repeats']
# Counting and printing the number of bad passwords
print(users['bad_password'].sum())
# Looking at the first 25 bad passwords
users.query('bad_password == True').head(25)

424


Unnamed: 0,id,user_name,password,length,too_short,common_password,common_word,first_name,last_name,uses_name,too_many_repeats,bad_password
5,6,shawn.harrington,5278049,7,True,False,False,shawn,harrington,False,False,True
6,7,evelyn.gay,master,6,True,True,True,evelyn,gay,False,False,True
7,8,noreen.hale,murphy,6,True,True,True,noreen,hale,False,False,True
8,9,gladys.ward,lwsves2,7,True,False,False,gladys,ward,False,False,True
11,12,milford.hubbard,hubbard,7,True,False,False,milford,hubbard,True,False,True
13,14,jamie.cochran,310356,6,True,False,False,jamie,cochran,False,False,True
15,16,lorrie.gay,oZ4k0QE,7,True,False,False,lorrie,gay,False,False,True
16,17,domingo.dyer,chelsea,7,True,True,True,domingo,dyer,False,False,True
17,18,martin.pacheco,zvc1939,7,True,False,False,martin,pacheco,False,False,True
18,19,shelby.massey,nickgd,6,True,False,False,shelby,massey,False,False,True


In [76]:
# Enter a password that passes the NIST requirements
# PLEASE DO NOT USE AN EXISTING PASSWORD HERE
new_password = "qwerty123"