In [24]:
import pandas as pd
import numpy as np
import string
import random
import re
import warnings

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, fbeta_score

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

from keras.models import Sequential
from keras.layers import Dense

import ipywidgets as widgets
from IPython.display import display, clear_output, HTML, Javascript
import requests

warnings.filterwarnings("ignore")


In [2]:
raw_df = pd.read_csv(
    '/kaggle/input/dataset1/data.csv',
    names=['Password', 'Strength_num'],  
    skiprows=1,  
    on_bad_lines='skip',
    encoding='utf-8',
    engine='python'
)

raw_df = raw_df.dropna()
raw_df = raw_df[raw_df['Password'].apply(lambda x: isinstance(x, str) and len(x) >= 6)]
raw_df = raw_df[raw_df['Strength_num'].isin([0, 1, 2])]

import string

def extract_features(pwd):
    has_lower = any(c.islower() for c in pwd)
    has_upper = any(c.isupper() for c in pwd)
    has_special = any(c in string.punctuation for c in pwd)
    length = len(pwd)
    return pd.Series([has_lower, has_upper, has_special, length])

raw_df[['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length']] = raw_df['Password'].apply(extract_features)

feat_df = pd.read_csv('/kaggle/input/password-dataset/passwords_dataset.csv')
feat_df = feat_df.dropna()
feat_df = feat_df[feat_df['Password'].apply(lambda x: isinstance(x, str) and len(x) >= 6)]
feat_df = feat_df[feat_df['Strength'].isin(['Weak', 'Medium', 'Strong'])]

strength_map = {'Weak': 0, 'Medium': 1, 'Strong': 2}
feat_df['Strength_num'] = feat_df['Strength'].map(strength_map)


In [3]:
common_cols = ['Password', 'Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length', 'Strength_num']
combined_df = pd.concat([feat_df[common_cols], raw_df[common_cols]], ignore_index=True)

print("✅ Combined dataset shape:", combined_df.shape)


✅ Combined dataset shape: (677209, 6)


In [4]:
X = combined_df[['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length']].astype(int)
y = combined_df['Strength_num']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
def evaluate_model(model, X_test, y_test, name="Model"):
    y_pred = model.predict(X_test)
    print(f"\n🔍 {name}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))
    print("F2 Score:", fbeta_score(y_test, y_pred, average='weighted', beta=2))
    return fbeta_score(y_test, y_pred, average='weighted', beta=2)

models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier()
}

model_scores = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    f2 = evaluate_model(model, X_test, y_test, name)
    model_scores[name] = (model, f2)

nn_model = Sequential()
nn_model.add(Dense(16, input_dim=4, activation='relu'))
nn_model.add(Dense(8, activation='relu'))
nn_model.add(Dense(3, activation='softmax'))
nn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

nn_model.fit(X_train, y_train, epochs=10, batch_size=256, verbose=0)
y_pred_nn = np.argmax(nn_model.predict(X_test), axis=1)

f2_nn = fbeta_score(y_test, y_pred_nn, average='weighted', beta=2)
print("\n🔍 Neural Network")
print("F2 Score:", f2_nn)
model_scores['Neural Network'] = (nn_model, f2_nn)

best_model_name = max(model_scores, key=lambda k: model_scores[k][1])
best_model = model_scores[best_model_name][0]
print(f"\n✅ Best model based on F2 score: {best_model_name}")



🔍 Logistic Regression
Accuracy: 0.9979548441399271
F1 Score: 0.997954916892398
F2 Score: 0.9979543284388867

🔍 Random Forest
Accuracy: 0.9995053233118235
F1 Score: 0.9995053893875931
F2 Score: 0.9995053322944827

🔍 Decision Tree
Accuracy: 0.9995053233118235
F1 Score: 0.9995053893875931
F2 Score: 0.9995053322944827


I0000 00:00:1750960414.761172      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1750960414.761941      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5
I0000 00:00:1750960418.046789      81 service.cc:148] XLA service 0x7b7c9c0046c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1750960418.048276      81 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1750960418.048293      81 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1750960418.336697      81 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1750960419.440289      81 device_compiler.h:188] Compiled clust

[1m4233/4233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step

🔍 Neural Network
F2 Score: 0.999438879885274

✅ Best model based on F2 score: Random Forest


In [6]:
def extract_features_pwd(pwd):
    has_lower = any(c.islower() for c in pwd)
    has_upper = any(c.isupper() for c in pwd)
    has_special = any(c in string.punctuation for c in pwd)
    length = len(pwd)
    return [int(has_lower), int(has_upper), int(has_special), length]

def predict_strength(pwd):
    features = np.array([extract_features_pwd(pwd)])
    if best_model_name == 'Neural Network':
        pred = np.argmax(best_model.predict(features), axis=1)[0]
    else:
        pred = best_model.predict(features)[0]
    return pred

def suggest_strong_password(base='', custom_word=None):
    while True:
        prefix = ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=5))
        suffix = ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=5))
        core = custom_word if custom_word else base
        candidate = prefix + core + suffix
        if predict_strength(candidate) == 2:
            return candidate


In [49]:
def crack_time_estimate(password, guesses_per_sec):
    charset = 0
    if any(c.islower() for c in password): charset += 26
    if any(c.isupper() for c in password): charset += 26
    if any(c.isdigit() for c in password): charset += 10
    if any(c in string.punctuation for c in password): charset += len(string.punctuation)
    total_combinations = charset ** len(password)
    avg_guesses = total_combinations // 2
    seconds = avg_guesses / guesses_per_sec
    return convert_time(seconds)

def convert_time(seconds):
    ms = (seconds - int(seconds)) * 1000
    m, s = divmod(int(seconds), 60)
    h, m = divmod(m, 60)
    d, h = divmod(h, 24)
    y, d = divmod(d, 365)
    mo, d = divmod(d, 30)
    return y, mo, d, h, m, s, int(ms)

def generate_strong_password(word=""):
    chars = string.ascii_letters + string.digits + string.punctuation
    while True:
        suffix = ''.join(random.choices(chars, k=random.randint(4, 6)))
        prefix = ''.join(random.choices(chars, k=random.randint(4, 6)))
        strong_pwd = prefix + word + suffix
        feat = [[
            any(c.islower() for c in strong_pwd),
            any(c.isupper() for c in strong_pwd),
            any(c in string.punctuation for c in strong_pwd),
            len(strong_pwd)
        ]]
        strength = best_model.predict(pd.DataFrame(feat, columns=['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length']))[0]
        if strength == 2:
            return strong_pwd

password_input = widgets.Password(layout=widgets.Layout(width='350px'))
password_box = widgets.HBox([
    widgets.HTML("<b>Password:</b>"),
    password_input
])

include_word = widgets.Checkbox(value=False, description="<b>Include custom word?</b>")
custom_word_input = widgets.Text(placeholder='e.g. Shield123', disabled=True, layout=widgets.Layout(width='350px'))
custom_word_box = widgets.HBox([
    widgets.HTML("<b>Custom Word:</b>"),
    custom_word_input
])

submit_button = widgets.Button(description="🔍 Analyze Password", button_style='success', layout=widgets.Layout(width='200px'))
output_area = widgets.Output()

def toggle_custom_input(change):
    custom_word_input.disabled = not change['new']
include_word.observe(toggle_custom_input, names='value')

def analyze_password(b):
    with output_area:
        clear_output()

        input_pwd = password_input.value.strip()
        if not input_pwd or len(input_pwd) < 4:
            display(HTML("<div style='color:red'>Please enter a valid password with at least 4 characters.</div>"))
            return

        has_lower = any(c.islower() for c in input_pwd)
        has_upper = any(c.isupper() for c in input_pwd)
        has_special = any(c in string.punctuation for c in input_pwd)
        length = len(input_pwd)

        input_features = pd.DataFrame([[has_lower, has_upper, has_special, length]],
                                      columns=['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length'])

        strength_label = {0: 'Weak', 1: 'Medium', 2: 'Strong'}
        strength_color = {0: '#f44336', 1: '#ff9800', 2: '#4caf50'}
        pred = best_model.predict(input_features)[0]

        bt = crack_time_estimate(input_pwd, 1e10)
        dt = crack_time_estimate(input_pwd, 1e6)
        ht = crack_time_estimate(input_pwd, 1e8)

        display(HTML(f"""
        <div style='border:2px solid #ccc;padding:20px;border-radius:10px;margin:10px 0;'>
            <h3 style='color:{strength_color[pred]}'>Password Strength: {strength_label[pred]}</h3>
            <p><b>Has Lowercase:</b> {has_lower}</p>
            <p><b>Has Uppercase:</b> {has_upper}</p>
            <p><b>Has Special Character:</b> {has_special}</p>
            <p><b>Password Length:</b> {length}</p>
        </div>
        <h4>Estimated Time to Crack</h4>
        <table style='border-collapse:collapse;width:90%;'>
            <tr><th>Method</th><th>Years</th><th>Months</th><th>Days</th><th>Hours</th><th>Mins</th><th>Secs</th><th>Ms</th></tr>
            <tr><td>Brute Force (10B/sec)</td>{''.join([f'<td>{v}</td>' for v in bt])}</tr>
            <tr><td>Dictionary (1M/sec)</td>{''.join([f'<td>{v}</td>' for v in dt])}</tr>
            <tr><td>Hybrid Guessing (100M/sec)</td>{''.join([f'<td>{v}</td>' for v in ht])}</tr>
        </table>
        """))

        if pred < 2:
            word = custom_word_input.value if include_word.value else ""
            suggestions = [generate_strong_password(word) for _ in range(3)]

            for idx, suggested_pwd in enumerate(suggestions, 1):
                bt2 = crack_time_estimate(suggested_pwd, 1e10)
                dt2 = crack_time_estimate(suggested_pwd, 1e6)
                ht2 = crack_time_estimate(suggested_pwd, 1e8)

                display(HTML(f"""
                <div style='margin-top:20px;padding:15px;background:#f9f9f9;border-left:6px solid #4caf50;'>
                    <h4>Suggested Strong Password {idx}:</h4>
                    <p style='font-weight:bold;font-size:18px'>{suggested_pwd}</p>
                    <h4>Estimated Time to Crack</h4>
                    <table style='border-collapse:collapse;width:90%;'>
                        <tr><th>Method</th><th>Years</th><th>Months</th><th>Days</th><th>Hours</th><th>Mins</th><th>Secs</th><th>Ms</th></tr>
                        <tr><td>Brute Force (10B/sec)</td>{''.join([f'<td>{v}</td>' for v in bt2])}</tr>
                        <tr><td>Dictionary (1M/sec)</td>{''.join([f'<td>{v}</td>' for v in dt2])}</tr>
                        <tr><td>Hybrid Guessing (100M/sec)</td>{''.join([f'<td>{v}</td>' for v in ht2])}</tr>
                    </table>
                    <h4>Comparison with Input Password:</h4>
                    <table style='border-collapse:collapse;width:90%;border:1px solid #ccc;'>
                        <tr style='background:#eee;'>
                            <th>Method</th><th>Input</th><th>Suggestion {idx}</th>
                        </tr>
                        <tr><td>Brute Force</td><td>{' '.join(map(str, bt))}</td><td>{' '.join(map(str, bt2))}</td></tr>
                        <tr><td>Dictionary</td><td>{' '.join(map(str, dt))}</td><td>{' '.join(map(str, dt2))}</td></tr>
                        <tr><td>Hybrid</td><td>{' '.join(map(str, ht))}</td><td>{' '.join(map(str, ht2))}</td></tr>
                    </table>
                </div>
                """))

submit_button.on_click(analyze_password)

def fetch_horizontal_news():
    api_key = "pub_c7284e4726d94120b0fc7ca9d9e8ec4e"
    url = f"https://newsdata.io/api/1/news?apikey={api_key}&q=password+OR+breach+OR+cybersecurity&language=en&category=technology"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.json().get("results", [])
    except Exception as e:
        return []
    return []

def display_horizontal_news(articles):
    if not articles:
        display(HTML("<p style='color:red;'>❌ No recent cybersecurity news found.</p>"))
        return

    news_html = """
    <style>
    .news-scroll {
        overflow-x: auto;
        white-space: nowrap;
        padding: 10px;
    }
    .news-card {
        display: inline-block;
        vertical-align: top;
        background: #f0f8ff;
        border: 1px solid #ccc;
        border-radius: 10px;
        margin: 0 10px;
        width: 300px;
        padding: 15px;
        box-shadow: 2px 2px 6px rgba(0,0,0,0.1);
        white-space: normal;
        word-wrap: break-word;
        font-family: 'Segoe UI', sans-serif;
    }
    .news-card h4 {
        color: #0d47a1;
        font-size: 16px;
        margin-top: 0;
        margin-bottom: 8px;
    }
    .news-card p {
        font-size: 13px;
        color: #444;
        margin: 0 0 8px;
    }
    .news-card .meta {
        font-size: 12px;
        color: #777;
    }
    .news-card a {
        color: #1a237e;
        font-weight: bold;
        font-size: 13px;
    }
    </style>
    <div class='news-scroll'>
    """

    seen = set()
    for article in articles:
        title = article.get("title", "No Title")
        if title in seen: continue
        seen.add(title)

        desc = article.get("description") or "No description available."
        url = article.get("link", "#")
        date = article.get("pubDate", "")[:10]
        source = article.get("source_id", "Unknown Source")

        news_html += f"""
        <div class='news-card'>
            <h4>{title}</h4>
            <p>{desc}</p>
            <p class='meta'><b>Source:</b> {source}<br><b>Date:</b> {date}</p>
            <a href='{url}' target='_blank'>🔗 Read More</a>
        </div>
        """
    news_html += "</div>"

    display(HTML(f"<h2 style='color:#3f51b5;'>📰 Live Cybersecurity News</h2>{news_html}"))

display(HTML("<h2 style='color:#3f51b5;'>CrackShield AI – The Intelligent Password Guardian</h2>"))
display(widgets.VBox([
    widgets.HTML("""
    <div style='background:#e3f2fd;padding:15px;border:1px solid #90caf9;border-radius:10px;margin-bottom:10px;'>
        <h3 style='margin-top:0;color:#0d47a1;'>🔐 Enter Your Password</h3>
    </div>
    """),
    password_box,
    include_word,
    custom_word_box,
    widgets.HTML("<br>"),
    submit_button
]))
display(output_area)

articles = fetch_horizontal_news()
display_horizontal_news(articles)



VBox(children=(HTML(value="\n    <div style='background:#e3f2fd;padding:15px;border:1px solid #90caf9;border-r…

Output()