# Bad Password Detection

##  Import Pandas as you will be using it in entire code.

In [1]:
import pandas as pd

#Load the dataset usernames_and_passwords.csv
users = pd.read_csv("datasets/usernames_and_passwords.csv")

#Check the information about the dataset.
users.info()

#Take a look on the dataset to know how is it arranged.
users.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 982 entries, 0 to 981
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         982 non-null    int64 
 1   user_name  982 non-null    object
 2   password   982 non-null    object
dtypes: int64(1), object(2)
memory usage: 23.1+ KB


Unnamed: 0,id,user_name,password
0,1,vance.jennings,joobheco
1,2,consuelo.eaton,0869347314
2,3,mitchel.perkins,fabypotter
3,4,odessa.vaughan,aharney88
4,5,araceli.wilder,acecdn3000


# Now you have to apply Digital Identity Guidelines of Authentication.

## 1. Number of characters in password should not be less than 8 digits.

In [3]:
# So now we are going to calculate lengths of users.password
users['length_of_password'] = users['password'].str.len()

# Flag the users with lengths of password less than 8.
users['short_password'] = users['length_of_password']<8

# Now we will add up all the users having short password and print the sum.
print(sum(users['short_password']))

# Take a look on the dataset and check whether length_of_password and short_password columns are added to the users data.
users.head()

376


Unnamed: 0,id,user_name,password,length_of_password,short_password
0,1,vance.jennings,joobheco,8,False
1,2,consuelo.eaton,0869347314,10,False
2,3,mitchel.perkins,fabypotter,10,False
3,4,odessa.vaughan,aharney88,9,False
4,5,araceli.wilder,acecdn3000,10,False


## Now load some common passwords people use

In [6]:
#Load the dataset common_userpasswords.txt
common_passwords = pd.read_csv("datasets/common_userpasswords.txt",
                header=None,
                squeeze=True)
#Take a look on the dataset to know how is it arranged.
common_passwords.head()

0       123456
1     password
2     12345678
3       qwerty
4    123456789
Name: 0, dtype: object

## 2. User passwords sholud not be present in the common_passwords.

In [7]:
# Flagging the users with passwords that are common passwords
users['common_password'] = users['password'].isin(common_passwords)

# Now we will add up all the users using common passwords and print the sum.
print(users['common_password'].sum())

# Take a look on the dataset and check whether common_password column is added to the users data.
users.head()

129


Unnamed: 0,id,user_name,password,length_of_password,short_password,common_password
0,1,vance.jennings,joobheco,8,False,False
1,2,consuelo.eaton,0869347314,10,False,False
2,3,mitchel.perkins,fabypotter,10,False,False
3,4,odessa.vaughan,aharney88,9,False,False
4,5,araceli.wilder,acecdn3000,10,False,False


## 3. Users should not use common words in their passwords.

In [8]:
#Load the dataset commonly_used_words.txt
words = pd.read_csv('datasets/commonly_used_words.txt',header=None,
                squeeze=True)

# Flagging the users with passwords that are common words
users['common_word'] = users['password'].str.lower().isin(words)

# Now we will add up all the users using common words in their passwords and print the sum.
print(users['common_word'].sum())

# Take a look on the dataset and check whether common_word column is added to the users data.
users.head()

137


Unnamed: 0,id,user_name,password,length_of_password,short_password,common_password,common_word
0,1,vance.jennings,joobheco,8,False,False,False
1,2,consuelo.eaton,0869347314,10,False,False,False
2,3,mitchel.perkins,fabypotter,10,False,False,False
3,4,odessa.vaughan,aharney88,9,False,False,False
4,5,araceli.wilder,acecdn3000,10,False,False,False


## 4. Users should not use their first and last names in the password.

In [9]:
# Extracting first and last names into their own columns
users['first_name'] = users['user_name'].str.extract(r'(^\w+)', expand = False)
users['last_name'] = users['user_name'].str.extract(r'(\w+$)', expand = False)

# Flagging the users with passwords made using their names
users['uses_name'] = (
    (users['password'].str.lower() == users['first_name']) |
    (users['password'].str.lower() == users['last_name']))

# Now we will add up and print the number of users using names as passwords
print(users['uses_name'].sum())

# Take a look on the dataset and check whether uses_name  column is added to the users data.
users.head()

50


Unnamed: 0,id,user_name,password,length_of_password,short_password,common_password,common_word,first_name,last_name,uses_name
0,1,vance.jennings,joobheco,8,False,False,False,vance,jennings,False
1,2,consuelo.eaton,0869347314,10,False,False,False,consuelo,eaton,False
2,3,mitchel.perkins,fabypotter,10,False,False,False,mitchel,perkins,False
3,4,odessa.vaughan,aharney88,9,False,False,False,odessa,vaughan,False
4,5,araceli.wilder,acecdn3000,10,False,False,False,araceli,wilder,False


## 5. Users should not use single character for more than 3 times in their passwords (no repetition).

In [11]:
# Flag the users with passwords with >= 3 repeats
users['character_repeats'] = users['password'].str.contains(r'(.)\1\1\1')

# Taking a look at the users with too many character repeats in their passwords.
users[users['character_repeats']]

  return func(self, *args, **kwargs)


Unnamed: 0,id,user_name,password,length_of_password,short_password,common_password,common_word,first_name,last_name,uses_name,character_repeats
146,147,patti.dixon,555555,6,True,True,False,patti,dixon,False,True
572,573,cornelia.bradley,555555,6,True,True,False,cornelia,bradley,False,True
644,645,essie.lopez,11111,5,True,True,False,essie,lopez,False,True
798,799,charley.key,888888,6,True,True,False,charley,key,False,True
807,808,thurman.osborne,rinnnng0,8,False,False,False,thurman,osborne,False,True
941,942,mitch.ferguson,aaaaaa,6,True,True,False,mitch,ferguson,False,True


## Now flag all the BAD PASSWORDS using above 5 Digital Identity Guidelines of Authentication.

In [13]:
users['bad_password'] = ( 
    users['short_password'] | 
    users['common_password'] |
    users['common_word'] |
    users['uses_name'] |
    users['character_repeats'] )

# Now add up all the users using bad passwords and print the number of bad passwords.
print(users['bad_password'].sum())

# And take a Look at the first 10 bad passwords
users['password'][users['bad_password']].head(10)

424


5     5278049
6      master
7      murphy
8     lwsves2
11    hubbard
13     310356
15    oZ4k0QE
16    chelsea
17    zvc1939
18     nickgd
Name: password, dtype: object

### *And now for futher development send these users having bad passwords an e-mail that strongly suggests that they have to change their password if their emails are present.