In [2]:
import pandas as pd
from tqdm import tqdm
import warnings

tqdm.pandas()
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("./data/stratified_sample_data.csv")
df.head()

Unnamed: 0,password,strength
0,csillik,0.180594
1,huniihuu,0.177778
2,chaipy,0.172331
3,876876b,0.155556
4,miiwhy,0.154795


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   password  10000 non-null  object 
 1   strength  10000 non-null  float64
dtypes: float64(1), object(1)
memory usage: 156.4+ KB


In [5]:
def lenTransform(text: str) -> int:
    """Calculates the length of a given text.

    Args:
        text (str): The input text for which the length needs to be calculated.

    Returns:
        int: The length of the input text as an integer.
    """
    return len(text)


df["len"] = df["password"].progress_apply(lambda x: lenTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 124186.63it/s]


Unnamed: 0,password,strength,len
8793,KAKAILDIAVOLOINILCORPO,0.823154,22
1122,karl4m,0.172331,6
1283,190397,0.154795,6
9318,smandaketapangkalbarindonesia,0.939366,29
7765,allXUXexept4ME,0.607788,14
3011,sabinra,0.201053,7
7125,liverpooltillidie07,0.761594,19
8823,albertoramirez150711,0.823355,20
3121,demi1989,0.249543,8
9314,mklandmts40203925971,0.841896,20


In [6]:
def alphaUCTransform(text: str) -> int:
    """Counts the number of uppercase alphabetic characters in a given text.

    Args:
        text (str): The input text in which the count of uppercase alphabetic characters needs to be calculated.

    Returns:
        int: The count of uppercase alphabetic characters in the input text as an integer.
    """
    return sum(1 for a in text if a.isupper())


df["alphaUC"] = df["password"].progress_apply(lambda x: alphaUCTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 114942.04it/s]


Unnamed: 0,password,strength,len,alphaUC
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22
1122,karl4m,0.172331,6,0
1283,190397,0.154795,6,0
9318,smandaketapangkalbarindonesia,0.939366,29,0
7765,allXUXexept4ME,0.607788,14,5
3011,sabinra,0.201053,7,0
7125,liverpooltillidie07,0.761594,19,0
8823,albertoramirez150711,0.823355,20,0
3121,demi1989,0.249543,8,0
9314,mklandmts40203925971,0.841896,20,0


In [7]:
def alphaLCTransform(text: str) -> int:
    """Counts the number of lowercase alphabetic characters in a given text.

    Args:
        text (str): The input text in which the count of lowercase alphabetic characters needs to be calculated.

    Returns:
        int: The input text in which the count of lowercase alphabetic characters needs to be calculated.
    """
    return sum(1 for a in text if a.islower())


df["alphaLC"] = df["password"].progress_apply(lambda x: alphaLCTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 107449.83it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0
1122,karl4m,0.172331,6,0,5
1283,190397,0.154795,6,0,0
9318,smandaketapangkalbarindonesia,0.939366,29,0,29
7765,allXUXexept4ME,0.607788,14,5,8
3011,sabinra,0.201053,7,0,7
7125,liverpooltillidie07,0.761594,19,0,17
8823,albertoramirez150711,0.823355,20,0,14
3121,demi1989,0.249543,8,0,4
9314,mklandmts40203925971,0.841896,20,0,9


In [8]:
def numberTransform(text: str) -> int:
    """Counts the number of decimal digits in a given text.

    Args:
        text (str): The input text in which the count of decimal digits needs to be calculated.

    Returns:
        int: The count of decimal digits in the input text as an integer.
    """
    return sum(1 for a in text if a.isdecimal())


df["number"] = df["password"].progress_apply(lambda x: numberTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 161474.03it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0
1122,karl4m,0.172331,6,0,5,1
1283,190397,0.154795,6,0,0,6
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0
7765,allXUXexept4ME,0.607788,14,5,8,1
3011,sabinra,0.201053,7,0,7,0
7125,liverpooltillidie07,0.761594,19,0,17,2
8823,albertoramirez150711,0.823355,20,0,14,6
3121,demi1989,0.249543,8,0,4,4
9314,mklandmts40203925971,0.841896,20,0,9,11


In [9]:
def symbolTransform(text: str) -> int:
    """Counts the number of specific symbols in a given text.

    Args:
        text (str): The input text in which the count of symbols needs to be calculated.

    Returns:
        int: The count of specific symbols in the input text as an integer.
    """
    return sum(a in set("!@#$%^&*") for a in text)


df["symbol"] = df["password"].progress_apply(lambda x: symbolTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 41334.51it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0
1122,karl4m,0.172331,6,0,5,1,0
1283,190397,0.154795,6,0,0,6,0
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0
7765,allXUXexept4ME,0.607788,14,5,8,1,0
3011,sabinra,0.201053,7,0,7,0,0
7125,liverpooltillidie07,0.761594,19,0,17,2,0
8823,albertoramirez150711,0.823355,20,0,14,6,0
3121,demi1989,0.249543,8,0,4,4,0
9314,mklandmts40203925971,0.841896,20,0,9,11,0


In [10]:
def midCharTransform(text: str) -> int:
    """Counts the number of characters that are digits or specific symbols and appear in the middle of a given text.

    Args:
        text (str): The input text in which the count of mid-characters needs to be calculated.

    Returns:
        int: The count of characters that are digits or specific symbols and appear in the middle of the input text as an integer.
    """
    return sum(
        bool(
            (a.isdecimal() or (a in set("!@#$%^&*"))) and ix > 0 and ix < len(text) - 1
        )
        for ix, a in enumerate(text)
    )


df["midChar"] = df["password"].progress_apply(lambda x: midCharTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 59441.74it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0
1122,karl4m,0.172331,6,0,5,1,0,1
1283,190397,0.154795,6,0,0,6,0,4
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1
3011,sabinra,0.201053,7,0,7,0,0,0
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1
8823,albertoramirez150711,0.823355,20,0,14,6,0,5
3121,demi1989,0.249543,8,0,4,4,0,3
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10


In [11]:
def repCharTransform(text: str) -> int:
    """Calculates the count of repeated characters in a given text.

    Args:
        text (str): The input text in which the count of repeated characters needs to be calculated.

    Returns:
        int: The count of repeated characters in the input text as an integer.
    """
    return len(text) - len(list(set(text)))


df["repChar"] = df["password"].progress_apply(lambda x: repCharTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 106111.57it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar,repChar
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0,11
1122,karl4m,0.172331,6,0,5,1,0,1,0
1283,190397,0.154795,6,0,0,6,0,4,1
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0,14
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1,3
3011,sabinra,0.201053,7,0,7,0,0,0,1
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1,8
8823,albertoramirez150711,0.823355,20,0,14,6,0,5,6
3121,demi1989,0.249543,8,0,4,4,0,3,1
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10,4


In [12]:
def uniqueCharTransform(text: str) -> int:
    """Calculates the count of unique characters in a given text.

    Args:
        text (str): The input text in which the count of unique characters needs to be calculated.

    Returns:
        int: The count of unique characters in the input text as an integer.
    """
    return len(list(set(text)))


df["uniqueChar"] = df["password"].progress_apply(lambda x: uniqueCharTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 103966.86it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar,repChar,uniqueChar
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0,11,11
1122,karl4m,0.172331,6,0,5,1,0,1,0,6
1283,190397,0.154795,6,0,0,6,0,4,1,5
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0,14,15
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1,3,11
3011,sabinra,0.201053,7,0,7,0,0,0,1,6
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1,8,11
8823,albertoramirez150711,0.823355,20,0,14,6,0,5,6,14
3121,demi1989,0.249543,8,0,4,4,0,3,1,7
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10,4,16


In [13]:
def consecAlphaUCTransform(text: str) -> int:
    """Calculates the count of consecutive uppercase alphabetic characters in a given text.

    Args:
        text (str): The input text in which the count of consecutive uppercase alphabetic characters needs to be calculated.

    Returns:
        int: The count of consecutive uppercase alphabetic characters in the input text as an integer.
    """
    temp = ""
    nConsecAlphaUC = 0
    for a in text:
        if a.isupper():
            if temp == a:
                nConsecAlphaUC += 1
            temp = a
    return nConsecAlphaUC


df["consecAlphaUC"] = df["password"].progress_apply(lambda x: consecAlphaUCTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 124007.45it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar,repChar,uniqueChar,consecAlphaUC
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0,11,11,0
1122,karl4m,0.172331,6,0,5,1,0,1,0,6,0
1283,190397,0.154795,6,0,0,6,0,4,1,5,0
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0,14,15,0
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1,3,11,0
3011,sabinra,0.201053,7,0,7,0,0,0,1,6,0
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1,8,11,0
8823,albertoramirez150711,0.823355,20,0,14,6,0,5,6,14,0
3121,demi1989,0.249543,8,0,4,4,0,3,1,7,0
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10,4,16,0


In [14]:
def consecAlphaLCTransform(text: str) -> int:
    """Calculates the count of consecutive lowercase alphabetic characters in a given text.

    Args:
        text (str): The input text in which the count of consecutive lowercase alphabetic characters needs to be calculated.

    Returns:
        int: The count of consecutive lowercase alphabetic characters in the input text as an integer.
    """
    temp = ""
    nConsecAlphaLC = 0
    for a in text:
        if a.islower():
            if temp == a:
                nConsecAlphaLC += 1
            temp = a
    return nConsecAlphaLC


df["consecAlphaLC"] = df["password"].progress_apply(lambda x: consecAlphaLCTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 62854.75it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar,repChar,uniqueChar,consecAlphaUC,consecAlphaLC
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0,11,11,0,0
1122,karl4m,0.172331,6,0,5,1,0,1,0,6,0,0
1283,190397,0.154795,6,0,0,6,0,4,1,5,0,0
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0,14,15,0,0
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1,3,11,0,1
3011,sabinra,0.201053,7,0,7,0,0,0,1,6,0,0
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1,8,11,0,2
8823,albertoramirez150711,0.823355,20,0,14,6,0,5,6,14,0,0
3121,demi1989,0.249543,8,0,4,4,0,3,1,7,0,0
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10,4,16,0,0


In [15]:
def consecNumberTransform(text: str) -> int:
    """Calculates the count of consecutive numeric digits in a given text.

    Args:
        text (str): The input text in which the count of consecutive numeric digits needs to be calculated.

    Returns:
        int: The count of consecutive numeric digits in the input text as an integer.
    """
    temp = ""
    nConsecNumber = 0
    for a in text:
        if a.isdecimal():
            if temp == a:
                nConsecNumber += 1
            temp = a
    return nConsecNumber


df["consecNumber"] = df["password"].progress_apply(lambda x: consecNumberTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 122705.77it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar,repChar,uniqueChar,consecAlphaUC,consecAlphaLC,consecNumber
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0,11,11,0,0,0
1122,karl4m,0.172331,6,0,5,1,0,1,0,6,0,0,0
1283,190397,0.154795,6,0,0,6,0,4,1,5,0,0,0
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0,14,15,0,0,0
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1,3,11,0,1,0
3011,sabinra,0.201053,7,0,7,0,0,0,1,6,0,0,0
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1,8,11,0,2,0
8823,albertoramirez150711,0.823355,20,0,14,6,0,5,6,14,0,0,1
3121,demi1989,0.249543,8,0,4,4,0,3,1,7,0,0,0
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10,4,16,0,0,0


In [16]:
def consecSymbolTransform(text: str) -> int:
    """Calculates the count of consecutive specific symbols in a given text.

    Args:
        text (str): The input text in which the count of consecutive specific symbols needs to be calculated.

    Returns:
        int: The count of consecutive specific symbols in the input text as an integer.
    """
    temp = ""
    nConsecSymbol = 0
    for a in text:
        if a in set("!@#$%^&*"):
            if temp == a:
                nConsecSymbol += 1
            temp = a
    return nConsecSymbol


df["consecSymbol"] = df["password"].progress_apply(lambda x: consecSymbolTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 66860.14it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar,repChar,uniqueChar,consecAlphaUC,consecAlphaLC,consecNumber,consecSymbol
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0,11,11,0,0,0,0
1122,karl4m,0.172331,6,0,5,1,0,1,0,6,0,0,0,0
1283,190397,0.154795,6,0,0,6,0,4,1,5,0,0,0,0
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0,14,15,0,0,0,0
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1,3,11,0,1,0,0
3011,sabinra,0.201053,7,0,7,0,0,0,1,6,0,0,0,0
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1,8,11,0,2,0,0
8823,albertoramirez150711,0.823355,20,0,14,6,0,5,6,14,0,0,1,0
3121,demi1989,0.249543,8,0,4,4,0,3,1,7,0,0,0,0
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10,4,16,0,0,0,0


In [17]:
def seqAlphaTransform(text: str) -> int:
    """Calculates the count of sequential alphabetic characters (forward or reverse) in a given text.

    Args:
        text (str): The input text in which the count of sequential alphabetic characters needs to be calculated.

    Returns:
        int: The count of sequential alphabetic characters (forward or reverse) in the input text as an integer.
    """
    sAlphas = "abcdefghijklmnopqrstuvwxyz"
    nSeqAlpha = 0
    for s in range(len(sAlphas) - 2):
        sFwd = sAlphas[s : s + 3]
        sRev = sFwd[::-1]
        if sFwd in text.lower() or sRev in text.lower():
            nSeqAlpha += 1
    return nSeqAlpha


df["seqAlpha"] = df["password"].progress_apply(lambda x: seqAlphaTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 28993.84it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar,repChar,uniqueChar,consecAlphaUC,consecAlphaLC,consecNumber,consecSymbol,seqAlpha
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0,11,11,0,0,0,0,0
1122,karl4m,0.172331,6,0,5,1,0,1,0,6,0,0,0,0,0
1283,190397,0.154795,6,0,0,6,0,4,1,5,0,0,0,0,0
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0,14,15,0,0,0,0,0
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1,3,11,0,1,0,0,0
3011,sabinra,0.201053,7,0,7,0,0,0,1,6,0,0,0,0,0
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1,8,11,0,2,0,0,0
8823,albertoramirez150711,0.823355,20,0,14,6,0,5,6,14,0,0,1,0,0
3121,demi1989,0.249543,8,0,4,4,0,3,1,7,0,0,0,0,0
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10,4,16,0,0,0,0,0


In [18]:
def seqNumberTransform(text: str) -> int:
    """Calculates the count of sequential numeric digits (forward or reverse) in a given text.

    Args:
        text (str): The input text in which the count of sequential numeric digits needs to be calculated.

    Returns:
        int: The count of sequential numeric digits (forward or reverse) in the input text as an integer.
    """
    sNumerics = "01234567890"
    nSeqNumber = 0
    for s in range(len(sNumerics) - 2):
        sFwd = sNumerics[s : s + 3]
        sRev = sFwd[::-1]
        if sFwd in text.lower() or sRev in text.lower():
            nSeqNumber += 1
    return nSeqNumber


df["seqNumber"] = df["password"].progress_apply(lambda x: seqNumberTransform(x))
df.sample(10, random_state=30)

100%|██████████| 10000/10000 [00:00<00:00, 65177.93it/s]


Unnamed: 0,password,strength,len,alphaUC,alphaLC,number,symbol,midChar,repChar,uniqueChar,consecAlphaUC,consecAlphaLC,consecNumber,consecSymbol,seqAlpha,seqNumber
8793,KAKAILDIAVOLOINILCORPO,0.823154,22,22,0,0,0,0,11,11,0,0,0,0,0,0
1122,karl4m,0.172331,6,0,5,1,0,1,0,6,0,0,0,0,0,0
1283,190397,0.154795,6,0,0,6,0,4,1,5,0,0,0,0,0,0
9318,smandaketapangkalbarindonesia,0.939366,29,0,29,0,0,0,14,15,0,0,0,0,0,0
7765,allXUXexept4ME,0.607788,14,5,8,1,0,1,3,11,0,1,0,0,0,0
3011,sabinra,0.201053,7,0,7,0,0,0,1,6,0,0,0,0,0,0
7125,liverpooltillidie07,0.761594,19,0,17,2,0,1,8,11,0,2,0,0,0,0
8823,albertoramirez150711,0.823355,20,0,14,6,0,5,6,14,0,0,1,0,0,0
3121,demi1989,0.249543,8,0,4,4,0,3,1,7,0,0,0,0,0,0
9314,mklandmts40203925971,0.841896,20,0,9,11,0,10,4,16,0,0,0,0,0,0
