In [13]:
import math
from collections import defaultdict

class PasswordStrengthEstimator:
    def __init__(self, dataset_file, max_n):
        self.dataset_file = dataset_file
        self.max_n = max_n
        self.alphabet = set()
        self.ngram_counts = defaultdict(int)
        self.conditional_probabilities = defaultdict(float)

    def _count_ngrams(self, n, password):
        for i in range(len(password) - n + 1):
            ngram = password[i:i+n]
            self.ngram_counts[ngram] += 1
            for char in ngram:
                self.alphabet.add(char)

    def _calculate_conditional_probabilities(self, n):
        ngram_counts_copy = self.ngram_counts.copy()
        for ngram, count in ngram_counts_copy.items():
            prefix = ngram[:-1]
            char = ngram[-1]
            prefix_count = sum(self.ngram_counts[p] for p in self._generate_prefixes(prefix, n))
            if prefix_count != 0:
                self.conditional_probabilities[(prefix, char)] = count / prefix_count
            else:
                self.conditional_probabilities[(prefix, char)] = 1e-6  # Small out-of-dictionary probability

    def _generate_prefixes(self, prefix, n):
        for char in self.alphabet:
            if len(prefix) < n - 1:
                yield prefix + char
            else:
                yield prefix[1:] + char

    def estimate_strength(self, password):
        min_strength = float('inf')  # Initialize with positive infinity
        for n in range(1, self.max_n + 1):
            self.ngram_counts.clear()
            self._count_ngrams(n, password)
            self._calculate_conditional_probabilities(n)
            probability = 1.0
            for i in range(n - 1, len(password)):
                prefix = password[i - n + 1:i]
                char = password[i]
                probability *= self.conditional_probabilities.get((prefix, char), 1e-6)  # Small out-of-dictionary probability
            strength = -math.log2(probability)
            min_strength = min(min_strength, strength)  
        
        # Normalize strength to a range from 0 to 100
        normalized_strength = min(min_strength, 100) / 100.0 * 100.0
        return normalized_strength

# Example usage
dataset_file = "pass.txt"
max_n = 3  # Set n=3 for this case
estimator = PasswordStrengthEstimator(dataset_file, max_n)

# Open the dataset file and iterate through each word
with open(dataset_file, 'r') as file:
    for line in file:
        password = line.strip()
        min_strength = estimator.estimate_strength(password)
        if min_strength>=50:
            est='strong'
        else:
            est='weak'
        print("'{}'".format(est))

'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'strong'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'strong'
'weak'
'weak'
'strong'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'strong'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
'weak'
