In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# ✅ List of vulnerable encryption algorithms
VULNERABLE_ALGORITHMS = ["DES", "3DES", "RC4", "Blowfish", "SHA-1", "MD5"]

# ✅ Load dataset dynamically
file_path = os.path.join(os.getcwd(), "cryptography_dataset_enhanced.csv")

# 🚨 Check if dataset exists
if not os.path.exists(file_path):
    print(f"❌ Error: File not found - {file_path}")
    print("📌 Make sure 'cryptography_dataset_enhanced.csv' is in the same folder as this script.")
    exit(1)

# ✅ Load the dataset
data = pd.read_csv(file_path)

# ✅ Drop rows with missing Ciphertext, Key, or Algorithm
data = data.dropna(subset=['Ciphertext', 'Key', 'Algorithm'])

# ✅ Combine Ciphertext, Key, and Algorithm as input features
data['Combined'] = data['Ciphertext'] + " " + data['Key'] + " " + data['Algorithm']

# ✅ Create a target column: 1 for vulnerable, 0 for secure
data['IsVulnerable'] = data['Algorithm'].apply(lambda x: 1 if x in VULNERABLE_ALGORITHMS else 0)

# ✅ Split data into training & testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    data['Combined'], data['IsVulnerable'], test_size=0.2, random_state=42
)

# ✅ TF-IDF Vectorizer to convert text into numerical features
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# ✅ Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(X_train_vec, y_train)

# ✅ Evaluate the model on the test set
y_pred = model.predict(X_test_vec)
accuracy = accuracy_score(y_test, y_pred)
print(f"🎯 Model Accuracy: {accuracy:.2f}")
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred))

# ✅ Function to check if ciphertext is vulnerable
def check_cipher_vulnerability(ciphertext, key, algorithm):
    """ Predicts if the ciphertext, key, and algorithm combination is vulnerable """
    combined_input = ciphertext + " " + key + " " + algorithm
    combined_input_vec = vectorizer.transform([combined_input])
    is_vulnerable = model.predict(combined_input_vec)[0]

    if is_vulnerable == 1:
        return f"⚠️ Vulnerable Encryption Detected: {algorithm} - ❌ NOT Secure!"
    else:
        return f"✅ Secure Encryption Detected: {algorithm} - 🔒 Safe to Use."


# ✅ Example Test Cases
test_cases = [
    # 🚨 Weak Examples
    ("3AB45CF912DEFA56C7890B12E345F678", "WeakRC4Key123", "RC4"),  # RC4
    ("A1B2C3D4E5F67890", "WeakDESKey", "DES"),  # DES
    ("7C8D9EAF12345678", "3DESKey123", "3DES"),  # 3DES
    ("CAFEBABEDEADBEEF", "ShortRC4Key", "RC4"),  # RC4

    # ✅ Strong Examples
    ("LMhm3BjIcUb/1eqdlvRyI/rcGWpF/2xvkdNsCosd+hIKdi...", "StrongAESKey098", "AES"),  # AES
    ("91eb4ffbe4b7e5f3844a...", "ChaChaSuperSecureKey", "ChaCha20")  # ChaCha20
]

# Assuming ciphertext, key, and algorithm are defined and are lists of the same length
ciphertext = ['ct1', 'ct2', 'ct3']
key = ['key1', 'key2', 'key3']
algorithm = ['algo1', 'algo2', 'algo3']

# Define a set of secure algorithms
secure_algorithms = {'algo1', 'algo3'}

# Run tests
for ct, k, algo in zip(ciphertext, key, algorithm):
    if algo in secure_algorithms:
        print(f"✅ Secure Encryption Detected: {algo} - 🔒 Safe to Use.")
    else:
        print(f"⚠️ Vulnerable Encryption Detected: {algo} - ❌ NOT Secure!")

🎯 Model Accuracy: 1.00

📊 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1565
           1       1.00      1.00      1.00      1950

    accuracy                           1.00      3515
   macro avg       1.00      1.00      1.00      3515
weighted avg       1.00      1.00      1.00      3515

✅ Secure Encryption Detected: algo1 - 🔒 Safe to Use.
⚠️ Vulnerable Encryption Detected: algo2 - ❌ NOT Secure!
✅ Secure Encryption Detected: algo3 - 🔒 Safe to Use.
