In [1]:
# Importing libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
# Loading the dataset
df = pd.read_csv('passwords_dataset.csv')

# Preprocessing
df['Has Lowercase'] = df['Has Lowercase'].astype(int)
df['Has Uppercase'] = df['Has Uppercase'].astype(int)
df['Has Special Character'] = df['Has Special Character'].astype(int)

# Encoding
df['Strength'] = df['Strength'].map({'Weak': 0, 'Medium': 1, 'Strong': 2})

# Displaying some rows to verify the preprocessing
df.head()


Unnamed: 0,Password,Has Lowercase,Has Uppercase,Has Special Character,Length,Strength
0,<%r?.,1,0,1,5,0
1,l(d_l,1,0,1,5,0
2,"|+Z)kDTRYo:q{""(",1,1,1,15,2
3,gwcNB[oS5!n%OPJ,1,1,1,15,2
4,^vXjCCP6,1,1,1,8,2


In [3]:
# Define features (X) and target variable (y)
X = df[['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length']]
y = df['Strength']


In [4]:
# Splitting the data into training and testing sets (80% for training, 20% for testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Displaying the shape of training and testing sets
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


Training data shape: (8000, 4)
Testing data shape: (2000, 4)


In [10]:
# Initializing and training the Decision Tree classifier
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train, y_train)

# Predicting on the test data
tree_predictions = tree_model.predict(X_test)

# Evaluating the Decision Tree model
print("Decision Tree Accuracy:", accuracy_score(y_test, tree_predictions))
print("Classification Report for Decision Tree:")
print(classification_report(y_test, tree_predictions))


Decision Tree Accuracy: 0.9995
Classification Report for Decision Tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       429
           1       1.00      1.00      1.00       370
           2       1.00      1.00      1.00      1201

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [11]:
# Initializing and training the Logistic Regression classifier
log_reg_model = LogisticRegression(max_iter=1000)
log_reg_model.fit(X_train, y_train)

# Predicting on the test data
log_reg_predictions = log_reg_model.predict(X_test)

# Evaluating the Logistic Regression model
print("Logistic Regression Accuracy:", accuracy_score(y_test, log_reg_predictions))
print("Classification Report for Logistic Regression:")
print(classification_report(y_test, log_reg_predictions))


Logistic Regression Accuracy: 0.997
Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       429
           1       1.00      0.99      0.99       370
           2       1.00      1.00      1.00      1201

    accuracy                           1.00      2000
   macro avg       1.00      0.99      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [8]:
# Initializing and training the Naive Bayes classifier
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

# Predicting on the test data
nb_predictions = nb_model.predict(X_test)

# Evaluating the Naive Bayes model
print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_predictions))
print("Classification Report for Naive Bayes:")
print(classification_report(y_test, nb_predictions))


Naive Bayes Accuracy: 0.613
Classification Report for Naive Bayes:
              precision    recall  f1-score   support

           0       1.00      0.06      0.11       429
           1       0.00      0.00      0.00       370
           2       0.61      1.00      0.76      1201

    accuracy                           0.61      2000
   macro avg       0.54      0.35      0.29      2000
weighted avg       0.58      0.61      0.48      2000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
