In [1]:
# Import the necessary Libraries
import pandas as pd

# For text feature extraction
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
# To save the trained model on local storage
import joblib

# Read the File
data = pd.read_csv('training.csv')

In [None]:
data

In [None]:
data.isnull.sum()

In [None]:
data.columns.value_counts()

In [None]:
data.isna.sum()

# Dataset is already cleaned and preprocessed for use.

In [2]:
# For creating a pipeline
from sklearn.pipeline import Pipeline

# Classifier Model (Logistic Regression)
from sklearn.linear_model import LogisticRegression

In [4]:
# Features which are passwords
features = data.values[:, 1].astype('str')

# Labels which are strength of password
labels = data.values[:, -1].astype('int')

In [10]:
##logistic regression

In [5]:
# Sequentially apply a list of transforms and a final estimator
classifier_model = Pipeline([
                ('tfidf', TfidfVectorizer(analyzer='char')),
                ('logisticRegression',LogisticRegression(multi_class='multinomial', solver='sag')),
])

In [6]:
# Fit the Model
classifier_model.fit(features, labels)

# Training Accuracy
print('Training Accuracy: ',classifier_model.score(features, labels))

Training Accuracy:  0.8194510213989618


In [7]:
# Save model for Logistic Regression
joblib.dump(classifier_model, 'LogisticRegression_Model.joblib')

['LogisticRegression_Model.joblib']

In [11]:
#neural network

In [13]:
# Classifier Model (MultiLayer Perceptron)
from sklearn.neural_network import MLPClassifier

In [14]:
# Sequentially apply a list of transforms and a final estimator
classifier_model = Pipeline([
                ('tfidf', TfidfVectorizer(analyzer='char')),
                ('mlpClassifier', MLPClassifier(solver='adam', 
                                                alpha=1e-5, 
                                                max_iter=400,
                                                activation='logistic')),
])

In [16]:
# Fit the Model
classifier_model.fit(features, labels)

# Training Accuracy
print('Training Accuracy: ',classifier_model.score(features, labels))

Training Accuracy:  0.993551181984539


In [17]:

# Save model for Logistic Regression
joblib.dump(classifier_model, 'NeuralNetwork_Model.joblib')

['NeuralNetwork_Model.joblib']

In [18]:
#random forest

In [19]:
# Classifier Model (Naive Bayes)
from sklearn.ensemble import RandomForestClassifier

In [20]:
# Sequentially apply a list of transforms and a final estimator
classifier_model = Pipeline([
                ('tfidf', TfidfVectorizer(analyzer='char')),
                ('randomForest',RandomForestClassifier(n_estimators=100, max_depth=50, criterion='entropy')),
])

In [21]:
# Fit the Model
classifier_model.fit(features, labels)

# Training Accuracy
print('Training Accuracy: ',classifier_model.score(features, labels))

Training Accuracy:  0.9997087052321022


In [22]:
# Save model for Logistic Regression
joblib.dump(classifier_model, 'RandomForest_Model.joblib')

['RandomForest_Model.joblib']

In [23]:
# naive bias


In [24]:
# Classifier Model (Naive Bayes)
from sklearn.naive_bayes import BernoulliNB

In [25]:
# Sequentially apply a list of transforms and a final estimator
classifier_model = Pipeline([
                ('tfidf', TfidfVectorizer(analyzer='char')),
                ('bernoulliNB',BernoulliNB()),
])

In [26]:
# Fit the Model
classifier_model.fit(features, labels)

# Training Accuracy
print('Training Accuracy: ',classifier_model.score(features, labels))

Training Accuracy:  0.8122164544198379


In [27]:
# Save model for Logistic Regression
joblib.dump(classifier_model, 'NaiveBayes_Model.joblib')

['NaiveBayes_Model.joblib']

In [28]:
# decision tree


In [33]:
from sklearn.tree import DecisionTreeClassifier

In [34]:
# Sequentially apply a list of transforms and a final estimator
classifier_model = Pipeline([
                ('tfidf', TfidfVectorizer(analyzer='char')),
                ('decisionTree',DecisionTreeClassifier()),
])

In [35]:
# Fit the Model
classifier_model.fit(features, labels)

# Training Accuracy
print('Training Accuracy: ',classifier_model.score(features, labels))

Training Accuracy:  0.9997236434253277


In [36]:
# Save model for Logistic Regression
joblib.dump(classifier_model, 'DecisionTree_Model.joblib')

['DecisionTree_Model.joblib']