# Password generation using NB

<img src = 'hacker.png'>

In [36]:
# Import libraries
import pandas as pd
import numpy as np
import random
import tqdm

##  Create Passwords Dataset

In [37]:
# Use a test generated dataframe
passwords = pd.read_csv('data_passwd.csv', names = ['Capital', 'Numeric', 'Word', 'Special'])
# Convert all features as string
passwords =   passwords.astype(str)
passwords.head(20)

Unnamed: 0,Capital,Numeric,Word,Special
0,A,sun,1,*+-/
1,B,cloud,105,!_
2,C,storm,208,","
3,D,light,1,!
4,A,storm,50,%-
5,B,cloud,2019,@
6,AB,sky,2018,%-
7,D,rain,2010,!!!
8,A,snow,156,#$%
9,DA,snow,2019,%


##  Calculate Probabilities

In [38]:
def calculate_probs(dataframe):
    
    '''
    create a dictionary for each feature containing levels of the feature and probability occurence of each level
    input: dataframe
    output: dictionary
    
    '''
    
    thetas = {}
    N = len(dataframe)
    # For each feature
    for feature in dataframe:
        # Count number of each level of a feature
        value_counts = dataframe[feature].value_counts()
        # Create key with probs respecting the index order
        thetas[feature] = (value_counts.index, list(value_counts / N))
        
    return(thetas)

In [39]:
thetas_mle = calculate_probs(passwords)
samples = ['one','two']
samples.append([''.join(['0.5','0.5']) for p in zip(*samples)])

In [40]:
samples

['one', 'two', ['0.50.5', '0.50.5', '0.50.5']]

##  Sample from model

In [51]:
def generate_data(thetas_mle, sample_size=10):
    
    '''
    create a dataframe with a FullPassword column generated using NB algorithm
    thetas_mle: dictionary of probabilities per level of a given feature, for all features
    sample_size: int, number of Password to generate
    
    '''
    
    samples, columns = [], []
    for k, values in thetas_mle.items():
        # put feature names in columns
        columns.append(k)
        # add sample_size observation with a randomly selected level of a feature with its occurence probability 
        samples.append(np.random.choice(values[0], size = sample_size, p = values[1]))
    # Add FullPassword feature
    columns.append('FullPassword')
    samples.append([''.join(p) for p in zip(*samples)])
    
    return(pd.DataFrame(zip(*samples), columns = columns))

In [53]:
generated_passwords = generate_data(thetas_mle, sample_size=100)
generated_passwords.head(10)

Unnamed: 0,Capital,Numeric,Word,Special,FullPassword
0,C,sky,2018,#$%,Csky2018#$%
1,A,sun,2019,*+-/,Asun2019*+-/
2,B,sun,2019,!,Bsun2019!
3,B,snow,1,#$%,Bsnow1#$%
4,B,cloud,1,!!!,Bcloud1!!!
5,D,storm,208,!,Dstorm208!
6,D,sky,50,*+-/,Dsky50*+-/
7,A,cloud,1,*+-/,Acloud1*+-/
8,D,snow,2019,!_,Dsnow2019!_
9,B,rain,156,%,Brain156%


# Personalized dataframe

In [54]:
def levels(liste, sort = False):
    
    '''
    return levels of a list
    sort: boolean, if the output shall be sorted
    '''
    
    output_levels = []
    for elm in liste:
        if elm not in output_levels:
            output_levels.append(elm)
    
    if sort:
        return(np.sort(output_levels))
    else:
        return(output_levels)

In [55]:
def create_dataframe(expected, n_obs):
    
    '''
    generate a dataframe with passwords
    example: create_dataframe(['Numeric', 'Word','Special'], 200) creates 200 observations of passwords containing 
             patterns of Numeric, Word and Special
    expected: list of string
    n_obs: int
    '''
    
    liste_dataframe = []
    dict_of_levels_of_each_cat = {}
    level = levels(expected)
    
    for elm in level:
        
        # enter all levels separated by a comma
        levels_of_feat = input(f'Enter all levels of [{elm}] separated with a comma: ')
        levels_of_feat = levels_of_feat.split(sep = ',')
        # just to be sure no repetitions
        levels_of_feat = levels(levels_of_feat)
        dict_of_levels_of_each_cat[elm] = levels_of_feat
        
        # Generate random index
        random_indexes = []
        for _ in range(n_obs):
            random_indexes.append(random.randint(0, len(dict_of_levels_of_each_cat[elm])-1))
        
        column_i = []
        
        # Assigning randomly value to a feature according to the levels wished
        for index in range(n_obs):
            column_i.append(dict_of_levels_of_each_cat[elm][random_indexes[index]])
        
        liste_dataframe.append(column_i)

    # Generating the dataframe
    dataframe = pd.DataFrame(zip(*liste_dataframe), columns = expected)
    
    return(dataframe)

In [56]:
personnalized_passwords = create_dataframe(['num', 'word', 'special'], 10)
personnalized_passwords.head()

Enter all levels of [num] separated with a comma: 1,2,3,10,50,100,2010,2018,2019
Enter all levels of [word] separated with a comma: orange, banana, almond, guava
Enter all levels of [special] separated with a comma: !@#,#$,#,@,!,~


Unnamed: 0,num,word,special
0,2019,banana,#$
1,100,banana,#$
2,1,banana,~
3,50,almond,!
4,2018,guava,!


In [57]:
thetas_mle_personnalized = calculate_probs(personnalized_passwords)
thetas_mle_personnalized

{'num': (Index(['2018', '3', '1', '10', '100', '50', '2019'], dtype='object'),
  [0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1]),
 'word': (Index(['orange', ' banana', ' guava', ' almond'], dtype='object'),
  [0.3, 0.3, 0.3, 0.1]),
 'special': (Index(['!', '#$', '@', '~', '#'], dtype='object'),
  [0.3, 0.3, 0.2, 0.1, 0.1])}

In [58]:
generated_passwords_personnalized = generate_data(thetas_mle_personnalized, sample_size=50)
generated_passwords_personnalized.head(10)

Unnamed: 0,num,word,special,FullPassword
0,2018,orange,@,2018orange@
1,1,guava,#,1 guava#
2,1,guava,~,1 guava~
3,2019,banana,~,2019 banana~
4,3,orange,#$,3orange#$
5,2019,banana,#$,2019 banana#$
6,100,banana,#,100 banana#
7,1,banana,~,1 banana~
8,50,orange,@,50orange@
9,10,guava,~,10 guava~


## Password crypting

In [59]:
import bcrypt

In [60]:
# Bytes literals are always prefixed with 'b' or 'B'; they produce an instance of the bytes type instead of the str type
key = bcrypt.kdf(password=b'password', salt=b'salt', desired_key_bytes=32,rounds=100)
key

b'W\x1cq\xbd\xf25W\xf9\xe7\x99\x0fH\xfb\x1a-:n\xbd\x03\xd0\x1a>\x12\xa7\xf7\x0b\x85\x03\xc9\xf9\xbe8'

In [61]:
# Example of Hashing and Checking a match

password = b"my_password"

# Hash a password for the first time, with a certain number of rounds
hashed = bcrypt.hashpw(password, bcrypt.gensalt())

# Check that a unhashed password matches one that has previously been hashed
if bcrypt.checkpw(password, hashed):
    print("It Matches") 
else:
    print("It Does not Match")

It Matches


In [62]:
def crypt(dataframe, password_feature = 'FullPassword'):
    
    '''
    create a feature with hashed passwords
    dataframe: the dataframe which contains the password_feature columns
    password_feature: string, name of the feature containing passwords generated 
    '''
    
    crypted_passwd = []
    for index in tqdm.tqdm(range(dataframe.shape[0])):
        password = bytes(dataframe.loc[index, password_feature], 'utf-8')
        hashed = bcrypt.hashpw(password, bcrypt.gensalt())
        crypted_passwd.append(hashed)
    dataframe['CryptedPassword'] = crypted_passwd
    
    return(dataframe)

In [63]:
crypt(generated_passwords_personnalized)

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:18<00:00,  2.63it/s]


Unnamed: 0,num,word,special,FullPassword,CryptedPassword
0,2018,orange,@,2018orange@,b'$2b$12$WMMZ5/G2/NAN.4oovJ1KTu.hjEdblTdEm6LIm...
1,1,guava,#,1 guava#,b'$2b$12$O/cS/X7zWwO.mEjMN2FD0.BBvL2pYZ9Rfg9I5...
2,1,guava,~,1 guava~,b'$2b$12$vqNgTRNNhLgfoD1akHLVR.KSIv9xQq1EJvKNE...
3,2019,banana,~,2019 banana~,b'$2b$12$.F.4KCy2y7XBJNiIXXA6COfrgwAprgiJzwDKj...
4,3,orange,#$,3orange#$,b'$2b$12$sdkJ24kyJU.uwQA30SoLXuVNagw2Lkph50NZk...
5,2019,banana,#$,2019 banana#$,b'$2b$12$8CbH1P2PSzYsRbcvUXBLCOToW./pKw5gv9oLt...
6,100,banana,#,100 banana#,b'$2b$12$MYlRtl/DEgBhR6KkYPcZoubkHI7SM2t7wm48t...
7,1,banana,~,1 banana~,b'$2b$12$F8Ovi88A1cgzuZdYUZIEZujioPQyZ6WK.Thlw...
8,50,orange,@,50orange@,b'$2b$12$spKC6Lghf1GqusO1qbkHf.b/.VsPxDzPZSmj9...
9,10,guava,~,10 guava~,b'$2b$12$tirTepinyotWKezAi9NBwezye98iFx2wWX2Iz...


## Using GAN... working on it