# **Password Strength Checker :**

# Importing Libraries and Dataset :

In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

In [4]:
data = pd.read_csv("/content/data.csv",on_bad_lines="skip")


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 669640 entries, 0 to 669639
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   password  669639 non-null  object
 1   strength  669640 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 10.2+ MB


In [6]:
data.head()

Unnamed: 0,password,strength
0,kzde5577,1
1,kino3434,1
2,visi7k1yr,1
3,megzy123,1
4,lamborghin1,1


# Data Preprocessing :

In [7]:
data['strength'].unique()

array([1, 2, 0])

In [8]:
data.isna().sum()

Unnamed: 0,0
password,1
strength,0


In [9]:
data.dropna(inplace = True)

In [10]:
data.duplicated().sum()

0

In [11]:
data["strength"] = data['strength'].map({0 : "Weak", 1 : "Medium", 2 : "Strong"})

In [12]:
data.head()

Unnamed: 0,password,strength
0,kzde5577,Medium
1,kino3434,Medium
2,visi7k1yr,Medium
3,megzy123,Medium
4,lamborghin1,Medium


# Building a function for our password :

In [13]:
def words(password) :
  character = []
  for i in password:
    character.append(i)
  return character


# Model Building & Deployement :

In [14]:
x = np.array(data["password"]) # Independent Variable/Input Data
y = np.array(data["strength"]) # Dependent Variable/Output Data

In [15]:
tdif = TfidfVectorizer(tokenizer = words)
x = tdif.fit_transform(x)



In [16]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=42)

In [17]:
para = {"n_estimators" : [i for i in range(1,20)], "criterion" : ["gini", "entropy", "log_loss"],"max_depth" : [i for i in range(1,20)],"max_features" : ["sqrt", "log2"]}

In [18]:
model = RandomForestClassifier()

In [21]:
rs = RandomizedSearchCV(model,param_distributions=para,n_iter=10)

In [22]:
rs.fit(x_train,y_train)

In [23]:
rs.best_score_

0.8954660092635758

In [24]:
rs.best_params_

{'n_estimators': 8,
 'max_features': 'sqrt',
 'max_depth': 18,
 'criterion': 'entropy'}

In [25]:
model = RandomForestClassifier(criterion='entropy',n_estimators=8,max_depth=18,max_features='sqrt')

In [26]:
model.fit(x_train,y_train)

In [28]:
print("Training Score : ",model.score(x_train,y_train)*100,"\nTesting Score :",model.score(x_test,y_test)*100)

Training Score :  91.41407604897368 
Testing Score : 89.66310256257093


# **Prediction :**

In [29]:
import getpass

In [30]:
user = getpass.getpass("Enter Password :")
data = tdif.transform([user]).toarray()
output = model.predict(data)
print(output)

Enter Password :··········
['Strong']
