## Análise de Indicadores

In [2]:
import pandas as pd
import pronouncing
import math
from scipy import stats

### Leitura das amostras

In [3]:
pw = pd.read_csv("./samples.csv", sep=';')

In [4]:
pw.head()

Unnamed: 0,Password,type,entropy
0,n(#?wb%874_^1^P@#)1,Random Passwords,3.9441
1,0WMuGYZqCm&YPFbMt,Random Passwords,3.8594
2,b(E?$3~+598w)m5-#,Random Passwords,3.9774
3,tD$)%H?(+#X#v-@6o,Random Passwords,3.9774
4,5GVhzWToSrh~pAWzkAa,Random Passwords,3.8363


### Indicadores

In [5]:
'''
Função para contar a quantidade de sílabas em cada senha, quando aplicável
'''
def syllable_count(password, sep='-'):
    pronunciations = []
    words = password.split(sep)
    syllable_c = 0
    for w in words:
        pronunciation_list = pronouncing.phones_for_word(w)
        if (pronunciation_list):
            syllable_c += pronouncing.syllable_count(pronunciation_list[0])
        else:
            syllable_c += 0
    return syllable_c

'''
Função para medir a pronuncia de cada senha, quando aplicável
'''
def get_pronunciation(password, sep='-'):
    pronunciations = []
    words = password.split(sep)
    for w in words:
        pronunciation = 0
        phones_list = pronouncing.phones_for_word(w)
        if (phones_list):
            p = pronouncing.stresses(phones_list[0])
            for i in p:
                if i == '0':
                    pronunciation+=1
                elif i == '1':
                    pronunciation+=3
                elif i == '2':
                    pronunciation+=2
            pronunciations.append(pronunciation)
        else:
             pronunciations.append(0)
    return sum(pronunciations) / len(pronunciations)

'''
Função em python para medir a entropia de cada senha
'''
def sci_entropy(password):
    character_set = set(password)
    password_length = len(password)
    probability_distribution = pd.Series(list(password.count(char) / password_length for char in character_set), index=character_set)
    entropy = stats.entropy(probability_distribution, base=2)
    return entropy

#### Quantidade de caracteres

In [6]:
pw["length"] = pw['Password'].apply(lambda x : len(x))

#### Quantidade de caracteres únicos

In [7]:
pw["char_set"] =  pw['Password'].apply(lambda x : len(set(x)))

#### Quantidade de sílabas

In [8]:
pw["syllable_count"] = pw['Password'].apply(lambda x : syllable_count(x))

#### Indicador de Pronunciabilidade

In [9]:
pw["pronouncing"] = pw['Password'].apply(lambda x : get_pronunciation(x))

#### Entropia

In [10]:
pw['entropy_sci'] = pw['Password'].apply(sci_entropy)

### Tabelas

### Média dos indicadores para cada tipo de senha

In [12]:
pw.groupby(by='type').mean()

Unnamed: 0_level_0,entropy,length,char_set,syllable_count,pronouncing,entropy_sci
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dict Passwords,2.32079,5.593,5.299,1.705,3.773,2.320353
Munged Passwords,2.270266,5.571,5.148,0.042,0.12,2.269841
Passphrases,3.444087,18.756,12.064,5.068,3.758,3.441478
Random Passwords,3.849219,18.467,15.276,0.037,0.0431,3.840923


### Correlação entre os indicadores - agrupado por tipo

In [13]:
pw.groupby(by='type').corr()

Unnamed: 0_level_0,Unnamed: 1_level_0,entropy,length,char_set,syllable_count,pronouncing,entropy_sci
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Dict Passwords,entropy,1.0,0.905829,0.976167,0.68251,0.660276,0.999998
Dict Passwords,length,0.905829,1.0,0.952629,0.798746,0.774249,0.905977
Dict Passwords,char_set,0.976167,0.952629,1.0,0.746038,0.726535,0.976304
Dict Passwords,syllable_count,0.68251,0.798746,0.746038,1.0,0.965132,0.682541
Dict Passwords,pronouncing,0.660276,0.774249,0.726535,0.965132,1.0,0.660351
Dict Passwords,entropy_sci,0.999998,0.905977,0.976304,0.682541,0.660351,1.0
Munged Passwords,entropy,1.0,0.884299,0.974777,-0.154075,-0.162017,0.999998
Munged Passwords,length,0.884299,1.0,0.939386,-0.16156,-0.169429,0.884424
Munged Passwords,char_set,0.974777,0.939386,1.0,-0.153519,-0.160438,0.974873
Munged Passwords,syllable_count,-0.154075,-0.16156,-0.153519,1.0,0.987366,-0.154017
