In [None]:
# 📦 Import core libraries for data handling and preprocessing
import pandas as pd
import numpy as np
import string
import random
import re
import warnings
from tqdm import tqdm  # 📊 Progress bar for loops

# 📚 Import machine learning tools
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    fbeta_score
)

# 🤖 Import traditional ML models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

# 🧠 Import Deep Learning modules from Keras
from keras.models import Sequential
from keras.layers import Dense

# 🎛️ UI and notebook integration
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML, Javascript

# 🌐 For fetching live cybersecurity news
import requests

# ⚠️ Ignore warning messages for clean output
warnings.filterwarnings("ignore")


In [None]:
from tqdm import tqdm

# -------------------- Load and Clean data.csv --------------------

# Read the first dataset with custom column names and skip header row
raw_df = pd.read_csv(
    '/kaggle/input/dataset1/data.csv',
    names=['Password', 'Strength_num'],     # Rename columns explicitly
    skiprows=1,                              # Skip the original header
    on_bad_lines='skip',                     # Skip corrupted or bad rows
    encoding='utf-8',
    engine='python'
)

# Remove any rows with missing values
raw_df = raw_df.dropna()

# Keep only passwords that are strings and at least 6 characters long
raw_df = raw_df[raw_df['Password'].apply(lambda x: isinstance(x, str) and len(x) >= 6)]

# Filter only valid strength labels: 0 = Weak, 1 = Medium, 2 = Strong
raw_df = raw_df[raw_df['Strength_num'].isin([0, 1, 2])]

# -------------------- Feature Extraction Function --------------------
def extract_features(pwd):
    """
    Extract features from a password:
    - Presence of lowercase
    - Presence of uppercase
    - Presence of special characters
    - Length of the password
    """
    has_lower = any(c.islower() for c in pwd)
    has_upper = any(c.isupper() for c in pwd)
    has_special = any(c in string.punctuation for c in pwd)
    length = len(pwd)
    return pd.Series([has_lower, has_upper, has_special, length])

# Apply feature extraction with tqdm tracker
tqdm.pandas(desc="🔍 Extracting features from raw_df")
raw_df[['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length']] = raw_df['Password'].progress_apply(extract_features)

# -------------------- Load and Clean passwords_dataset.csv --------------------

# Load the second dataset
feat_df = pd.read_csv('/kaggle/input/password-dataset/passwords_dataset.csv')

# Drop missing entries
feat_df = feat_df.dropna()

# Keep passwords that are strings and of sufficient length
feat_df = feat_df[feat_df['Password'].apply(lambda x: isinstance(x, str) and len(x) >= 6)]

# Keep only rows with valid strength labels
feat_df = feat_df[feat_df['Strength'].isin(['Weak', 'Medium', 'Strong'])]

# Convert categorical strength labels to numerical
strength_map = {'Weak': 0, 'Medium': 1, 'Strong': 2}
feat_df['Strength_num'] = feat_df['Strength'].map(strength_map)


In [None]:
from tqdm import tqdm

# -------------------- Combine Both Cleaned Datasets --------------------

# Define the common columns to retain from both datasets
common_cols = ['Password', 'Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length', 'Strength_num']

# Optional: Add tqdm tracker to visualize progress (in case of large datasets)
tqdm.pandas(desc="📦 Combining datasets")

# Combine the cleaned feature dataframe and raw dataframe using only the common columns
combined_df = pd.concat([feat_df[common_cols], raw_df[common_cols]], ignore_index=True)

# Print the shape of the final combined dataset
print("✅ Combined dataset shape:", combined_df.shape)


In [None]:
# -------------------- Feature and Target Selection --------------------

# Select relevant feature columns and ensure values are integers (for model compatibility)
X = combined_df[['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length']].astype(int)

# Define the target variable: password strength class (0 = Weak, 1 = Medium, 2 = Strong)
y = combined_df['Strength_num']

# -------------------- Train-Test Split --------------------

# Split the data into training (80%) and testing (20%) subsets for evaluation
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,        # 20% test size
    random_state=42       # Seed for reproducibility
)

# ✅ Tracker Logs: Confirm the shape of splits
print("✅ Feature and Target split complete.")
print(f"🧠 Training set size: {X_train.shape[0]} samples")
print(f"🧪 Testing set size: {X_test.shape[0]} samples")
print(f"🎯 Total features used: {X.shape[1]}")


In [None]:
# Function to evaluate a given model using Accuracy, F1, and F2 scores
def evaluate_model(model, X_test, y_test, name="Model"):
    y_pred = model.predict(X_test)
    print(f"\n🔍 {name}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))
    print("F2 Score:", fbeta_score(y_test, y_pred, average='weighted', beta=2))
    return fbeta_score(y_test, y_pred, average='weighted', beta=2)

# Dictionary of ML models to evaluate
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier()
}

# Train each model and evaluate using F2 score
model_scores = {}
print("\n🔄 Training traditional ML models...")
for name, model in tqdm(models.items(), desc="Evaluating ML Models"):
    model.fit(X_train, y_train)
    f2 = evaluate_model(model, X_test, y_test, name)
    model_scores[name] = (model, f2)

# Define a simple Neural Network using Keras
print("\n⚙️ Compiling Neural Network...")
nn_model = Sequential()
nn_model.add(Dense(16, input_dim=4, activation='relu'))
nn_model.add(Dense(8, activation='relu'))
nn_model.add(Dense(3, activation='softmax'))  # 3 output classes for strength
nn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the neural network
print("\n🚀 Training Neural Network...")
nn_model.fit(X_train, y_train, epochs=10, batch_size=256, verbose=0)

# Predict and calculate F2 score for neural network
y_pred_nn = np.argmax(nn_model.predict(X_test), axis=1)
f2_nn = fbeta_score(y_test, y_pred_nn, average='weighted', beta=2)
print("\n🔍 Neural Network")
print("F2 Score:", f2_nn)

# Add neural network result to model scores
model_scores['Neural Network'] = (nn_model, f2_nn)

# Select best model based on F2 score
best_model_name = max(model_scores, key=lambda k: model_scores[k][1])
best_model = model_scores[best_model_name][0]
print(f"\n✅ Best model based on F2 score: {best_model_name}")

In [None]:
from tqdm import tqdm

# -------------------- Password Feature Extraction --------------------

def extract_features_pwd(pwd):
    """
    Extract features from a password:
    - Checks for lowercase, uppercase, special characters
    - Calculates the total length of the password
    Returns:
        List of [has_lower (0/1), has_upper (0/1), has_special (0/1), length]
    """
    has_lower = any(c.islower() for c in pwd)
    has_upper = any(c.isupper() for c in pwd)
    has_special = any(c in string.punctuation for c in pwd)
    length = len(pwd)
    return [int(has_lower), int(has_upper), int(has_special), length]

# -------------------- Predict Password Strength --------------------

def predict_strength(pwd):
    """
    Predict the strength of a given password using the best-trained model.
    Supports both traditional ML models and Neural Network.
    
    Returns:
        0 = Weak, 1 = Medium, 2 = Strong
    """
    features = np.array([extract_features_pwd(pwd)])

    # Use neural network if selected as best model
    if best_model_name == 'Neural Network':
        pred = np.argmax(best_model.predict(features), axis=1)[0]
    else:
        pred = best_model.predict(features)[0]

    print(f"[TRACKER] Password strength prediction done for: {pwd} => Class {pred}")
    return pred

# -------------------- Suggest Strong Password --------------------

def suggest_strong_password(base='', custom_word=None):
    """
    Generate a strong password candidate by combining a random prefix/suffix with a core word.
    Ensures the suggested password is classified as 'Strong' by the model.

    Parameters:
        base (str): Optional base string
        custom_word (str): Optional custom word to include

    Returns:
        A randomly generated strong password string
    """
    attempts = 0
    for _ in tqdm(range(10000), desc="⏳ Generating Strong Password"):
        attempts += 1
        prefix = ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=5))
        suffix = ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=5))
        core = custom_word if custom_word else base
        candidate = prefix + core + suffix

        if predict_strength(candidate) == 2:
            print(f"[TRACKER] Strong password found after {attempts} attempts.")
            return candidate

    print("[TRACKER] Failed to generate strong password in 10000 attempts.")
    return None


In [None]:
# ---------------------- Password Crack Time Estimation ----------------------

def crack_time_estimate(password, guesses_per_sec):
    """
    Estimate the time required to crack a password based on guessing rate.
    
    Parameters:
    - password (str): the input password.
    - guesses_per_sec (float): number of guesses per second (e.g., 1e10 for brute-force).
    
    Returns:
    - tuple: (years, months, days, hours, minutes, seconds, milliseconds)
    """
    charset = 0
    # Count character set size based on presence of types
    if any(c.islower() for c in password): charset += 26
    if any(c.isupper() for c in password): charset += 26
    if any(c.isdigit() for c in password): charset += 10
    if any(c in string.punctuation for c in password): charset += len(string.punctuation)
    
    total_combinations = charset ** len(password)
    avg_guesses = total_combinations // 2
    seconds = avg_guesses / guesses_per_sec
    return convert_time(seconds)

# ---------------------- Time Converter Helper ----------------------

def convert_time(seconds):
    """
    Convert seconds to (years, months, days, hours, minutes, seconds, milliseconds)
    """
    ms = (seconds - int(seconds)) * 1000
    m, s = divmod(int(seconds), 60)
    h, m = divmod(m, 60)
    d, h = divmod(h, 24)
    y, d = divmod(d, 365)
    mo, d = divmod(d, 30)
    return y, mo, d, h, m, s, int(ms)

# ---------------------- Strong Password Generator ----------------------

def generate_strong_password(word=""):
    """
    Generate a strong password using a custom word (optional).
    
    Parameters:
    - word (str): custom word to include in the generated password.
    
    Returns:
    - str: strong password that is predicted as "Strong" by the model.
    """
    chars = string.ascii_letters + string.digits + string.punctuation
    while True:
        suffix = ''.join(random.choices(chars, k=random.randint(4, 6)))
        prefix = ''.join(random.choices(chars, k=random.randint(4, 6)))
        strong_pwd = prefix + word + suffix
        feat = [[
            any(c.islower() for c in strong_pwd),
            any(c.isupper() for c in strong_pwd),
            any(c in string.punctuation for c in strong_pwd),
            len(strong_pwd)
        ]]
        strength = best_model.predict(pd.DataFrame(feat, columns=['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length']))[0]
        if strength == 2:
            return strong_pwd

# ---------------------- UI Widgets for Password Input ----------------------

password_input = widgets.Text(layout=widgets.Layout(width='350px'))  # <-- visible text input
password_box = widgets.HBox([
    widgets.HTML("<b>Password:</b>"),
    password_input
])

include_word = widgets.Checkbox(value=False, description="<b>Include custom word?</b>")
custom_word_input = widgets.Text(placeholder='e.g. Shield123', disabled=True, layout=widgets.Layout(width='350px'))
custom_word_box = widgets.HBox([
    widgets.HTML("<b>Custom Word:</b>"),
    custom_word_input
])

submit_button = widgets.Button(description="🔍 Analyze Password", button_style='success', layout=widgets.Layout(width='200px'))
output_area = widgets.Output()

# Enable/disable custom word textbox
def toggle_custom_input(change):
    custom_word_input.disabled = not change['new']
include_word.observe(toggle_custom_input, names='value')

# ---------------------- Main Password Analysis Handler ----------------------

def analyze_password(b):
    with output_area:
        clear_output()

        input_pwd = password_input.value.strip()
        if not input_pwd or len(input_pwd) < 4:
            display(HTML("<div style='color:red'>Please enter a valid password with at least 4 characters.</div>"))
            return

        # Extract password features
        has_lower = any(c.islower() for c in input_pwd)
        has_upper = any(c.isupper() for c in input_pwd)
        has_special = any(c in string.punctuation for c in input_pwd)
        length = len(input_pwd)

        input_features = pd.DataFrame([[has_lower, has_upper, has_special, length]],
                                      columns=['Has Lowercase', 'Has Uppercase', 'Has Special Character', 'Length'])

        # Predict password strength
        strength_label = {0: 'Weak', 1: 'Medium', 2: 'Strong'}
        strength_color = {0: '#f44336', 1: '#ff9800', 2: '#4caf50'}
        pred = best_model.predict(input_features)[0]

        # Estimate time to crack under different attack models
        bt = crack_time_estimate(input_pwd, 1e10)  # Brute Force
        dt = crack_time_estimate(input_pwd, 1e6)   # Dictionary
        ht = crack_time_estimate(input_pwd, 1e8)   # Hybrid

        # Display password analysis result
        display(HTML(f"""
        <div style='border:2px solid #ccc;padding:20px;border-radius:10px;margin:10px 0;'>
            <h3 style='color:{strength_color[pred]}'>Password Strength: {strength_label[pred]}</h3>
            <p><b>Has Lowercase:</b> {has_lower}</p>
            <p><b>Has Uppercase:</b> {has_upper}</p>
            <p><b>Has Special Character:</b> {has_special}</p>
            <p><b>Password Length:</b> {length}</p>
        </div>
        <h4>Estimated Time to Crack</h4>
        <table style='border-collapse:collapse;width:90%;'>
            <tr><th>Method</th><th>Years</th><th>Months</th><th>Days</th><th>Hours</th><th>Mins</th><th>Secs</th><th>Ms</th></tr>
            <tr><td>Brute Force (10B/sec)</td>{''.join([f'<td>{v}</td>' for v in bt])}</tr>
            <tr><td>Dictionary (1M/sec)</td>{''.join([f'<td>{v}</td>' for v in dt])}</tr>
            <tr><td>Hybrid Guessing (100M/sec)</td>{''.join([f'<td>{v}</td>' for v in ht])}</tr>
        </table>
        """))

        # If password is weak or medium, suggest improvements
        if pred < 2:
            word = custom_word_input.value if include_word.value else ""
            suggestions = [generate_strong_password(word) for _ in range(3)]

            for idx, suggested_pwd in enumerate(suggestions, 1):
                bt2 = crack_time_estimate(suggested_pwd, 1e10)
                dt2 = crack_time_estimate(suggested_pwd, 1e6)
                ht2 = crack_time_estimate(suggested_pwd, 1e8)

                display(HTML(f"""
                <div style='margin-top:20px;padding:15px;background:#f9f9f9;border-left:6px solid #4caf50;'>
                    <h4>Suggested Strong Password {idx}:</h4>
                    <p style='font-weight:bold;font-size:18px'>{suggested_pwd}</p>
                    <h4>Estimated Time to Crack</h4>
                    <table style='border-collapse:collapse;width:90%;'>
                        <tr><th>Method</th><th>Years</th><th>Months</th><th>Days</th><th>Hours</th><th>Mins</th><th>Secs</th><th>Ms</th></tr>
                        <tr><td>Brute Force (10B/sec)</td>{''.join([f'<td>{v}</td>' for v in bt2])}</tr>
                        <tr><td>Dictionary (1M/sec)</td>{''.join([f'<td>{v}</td>' for v in dt2])}</tr>
                        <tr><td>Hybrid Guessing (100M/sec)</td>{''.join([f'<td>{v}</td>' for v in ht2])}</tr>
                    </table>
                    <h4>Comparison with Input Password:</h4>
                    <table style='border-collapse:collapse;width:90%;border:1px solid #ccc;'>
                        <tr style='background:#eee;'>
                            <th>Method</th><th>Input</th><th>Suggestion {idx}</th>
                        </tr>
                        <tr><td>Brute Force</td><td>{' '.join(map(str, bt))}</td><td>{' '.join(map(str, bt2))}</td></tr>
                        <tr><td>Dictionary</td><td>{' '.join(map(str, dt))}</td><td>{' '.join(map(str, dt2))}</td></tr>
                        <tr><td>Hybrid</td><td>{' '.join(map(str, ht))}</td><td>{' '.join(map(str, ht2))}</td></tr>
                    </table>
                </div>
                """))

submit_button.on_click(analyze_password)

# ---------------------- Live Cybersecurity News API Fetch ----------------------

def fetch_horizontal_news():
    """
    Fetch live cybersecurity-related news using NewsData.io API.
    
    Returns:
    - list: articles list
    """
    api_key = "pub_c7284e4726d94120b0fc7ca9d9e8ec4e"
    url = f"https://newsdata.io/api/1/news?apikey={api_key}&q=password+OR+breach+OR+cybersecurity&language=en&category=technology"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.json().get("results", [])
    except Exception as e:
        return []
    return []

# ---------------------- Horizontal News Card Display ----------------------

def display_horizontal_news(articles):
    """
    Render news articles as horizontally scrollable cards in HTML.
    
    Parameters:
    - articles (list): list of news articles to display
    """
    if not articles:
        display(HTML("<p style='color:red;'>❌ No recent cybersecurity news found.</p>"))
        return

    news_html = """
    <style>
    .news-scroll {
        overflow-x: auto;
        white-space: nowrap;
        padding: 10px;
    }
    .news-card {
        display: inline-block;
        vertical-align: top;
        background: #f0f8ff;
        border: 1px solid #ccc;
        border-radius: 10px;
        margin: 0 10px;
        width: 300px;
        padding: 15px;
        box-shadow: 2px 2px 6px rgba(0,0,0,0.1);
        white-space: normal;
        word-wrap: break-word;
        font-family: 'Segoe UI', sans-serif;
    }
    .news-card h4 {
        color: #0d47a1;
        font-size: 16px;
        margin-top: 0;
        margin-bottom: 8px;
    }
    .news-card p {
        font-size: 13px;
        color: #444;
        margin: 0 0 8px;
    }
    .news-card .meta {
        font-size: 12px;
        color: #777;
    }
    .news-card a {
        color: #1a237e;
        font-weight: bold;
        font-size: 13px;
    }
    </style>
    <div class='news-scroll'>
    """

    seen = set()
    for article in articles:
        title = article.get("title", "No Title")
        if title in seen: continue
        seen.add(title)

        desc = article.get("description") or "No description available."
        url = article.get("link", "#")
        date = article.get("pubDate", "")[:10]
        source = article.get("source_id", "Unknown Source")

        news_html += f"""
        <div class='news-card'>
            <h4>{title}</h4>
            <p>{desc}</p>
            <p class='meta'><b>Source:</b> {source}<br><b>Date:</b> {date}</p>
            <a href='{url}' target='_blank'>🔗 Read More</a>
        </div>
        """
    news_html += "</div>"

    display(HTML(f"<h2 style='color:#3f51b5;'>📰 Live Cybersecurity News</h2>{news_html}"))

# ---------------------- Final UI Rendering ----------------------

display(HTML("<h2 style='color:#3f51b5;'>CrackShield AI – The Intelligent Password Guardian</h2>"))
display(widgets.VBox([
    widgets.HTML("""
    <div style='background:#e3f2fd;padding:15px;border:1px solid #90caf9;border-radius:10px;margin-bottom:10px;'>
        <h3 style='margin-top:0;color:#0d47a1;'>🔐 Enter Your Password</h3>
    </div>
    """),
    password_box,
    include_word,
    custom_word_box,
    widgets.HTML("<br>"),
    submit_button
]))
display(output_area)

# Fetch and render live news
articles = fetch_horizontal_news()
display_horizontal_news(articles)
