# Importing a library that is not in Colaboratory

To import a library that's not in Colaboratory by default, you can use `!pip install` or `!apt-get install`.

In [2]:
!pip install pycryptodome


Collecting pycryptodome
  Downloading pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycryptodome
Successfully installed pycryptodome-3.20.0


In [3]:
import os
import random
import csv
from Crypto.Cipher import AES, Blowfish, DES3, DES, ARC4
from Crypto.Util.Padding import pad
from Crypto.Random import get_random_bytes

# Function to generate random plaintexts
def generate_plaintexts(num, size):
    return [get_random_bytes(size) for _ in range(num)]

# Encryption functions for each algorithm
def aes_encrypt(plaintext, key):
    cipher = AES.new(key, AES.MODE_ECB)
    return cipher.encrypt(pad(plaintext, AES.block_size))

def blowfish_encrypt(plaintext, key):
    cipher = Blowfish.new(key, Blowfish.MODE_ECB)
    return cipher.encrypt(pad(plaintext, Blowfish.block_size))

def des3_encrypt(plaintext, key):
    cipher = DES3.new(key, DES3.MODE_ECB)
    return cipher.encrypt(pad(plaintext, DES3.block_size))

def des_encrypt(plaintext, key):
    cipher = DES.new(key, DES.MODE_ECB)
    return cipher.encrypt(pad(plaintext, DES.block_size))

def rc4_encrypt(plaintext, key):
    cipher = ARC4.new(key)  # Using ARC4 for RC4 encryption
    return cipher.encrypt(plaintext)

# Function to save ciphertexts to a CSV file (single file for all algorithms)
def save_to_csv(filename, data):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Algorithm', 'Ciphertext'])  # Add headers

        for row in data:
            writer.writerow(row)

# Generate 220 plaintexts of size 16 bytes
num_plaintexts = 220
plaintext_size = 16
plaintexts = generate_plaintexts(num_plaintexts, plaintext_size)

# Generate a common key for each algorithm
aes_key = get_random_bytes(16)
blowfish_key = get_random_bytes(16)
des3_key = DES3.adjust_key_parity(get_random_bytes(24))  # DES3 needs 24 bytes for 3DES
des_key = get_random_bytes(8)  # DES key is 8 bytes
rc4_key = get_random_bytes(16)  # RC4 key

# Dictionary to hold ciphertexts for each algorithm
ciphertexts = {
    'AES': [],
    'Blowfish': [],
    '3DES': [],
    'DES': [],
    'RC4': []
}

# Encrypt all plaintexts with each algorithm using the same keys
for pt in plaintexts:
    ciphertexts['AES'].append(aes_encrypt(pt, aes_key))
    ciphertexts['Blowfish'].append(blowfish_encrypt(pt, blowfish_key))
    ciphertexts['3DES'].append(des3_encrypt(pt, des3_key))
    ciphertexts['DES'].append(des_encrypt(pt, des_key))
    ciphertexts['RC4'].append(rc4_encrypt(pt, rc4_key))

# Prepare a list of data to write to a single CSV file
csv_data = []

for algo, cts in ciphertexts.items():
    for ct in cts:
        csv_data.append([algo, ct.hex()])  # Convert binary to hex for readable format

# Save everything to a single CSV file
save_to_csv('all_ciphertexts.csv', csv_data)
print("All data saved to 'all_ciphertexts.csv'")



All data saved to 'all_ciphertexts.csv'


In [8]:
# Check the number of rows in the CSV file
import pandas as pd
df = pd.read_csv('all_ciphertexts.csv')
print("Total ciphertexts:", len(df))


Total ciphertexts: 1100


In [9]:
import pandas as pd
from sklearn.utils import shuffle

# Function to load ciphertexts from CSV file
def load_ciphertexts_from_csv(filename):
    df = pd.read_csv(filename)
    return df

# Function to save training and testing data to separate CSV files
def save_training_testing_data(training_data, testing_data, training_filename, testing_filename):
    training_data.to_csv(training_filename, index=False)
    testing_data.to_csv(testing_filename, index=False)

# Load ciphertexts from the CSV file
df = load_ciphertexts_from_csv('all_ciphertexts.csv')

# Shuffle the dataframe to ensure randomness
df = shuffle(df, random_state=42)

# Debugging: Check total number of samples
total_samples = len(df)
print(f"Total samples in data: {total_samples}")

# Separate the data into training and testing sets
num_training_samples = 40 * 5  # 40 ciphertexts per algorithm
num_testing_samples = 180  # Total 180 ciphertexts for testing (9 groups of 20)

# Split into training and testing
df_training = df.groupby('Algorithm').head(40).reset_index(drop=True)
df_testing = df.drop(df_training.index).reset_index(drop=True)

# Debugging: Check sizes of the training and testing sets
print(f"Number of training samples: {len(df_training)}")
print(f"Number of testing samples: {len(df_testing)}")

# Ensure that the number of testing samples is exactly 180 (9 groups of 20)
if len(df_testing) != num_testing_samples:
    print(f"Warning: Expected {num_testing_samples} testing samples, but found {len(df_testing)}.")

# Save training and testing data to CSV files
save_training_testing_data(df_training, df_testing, 'training_ciphertexts.csv', 'testing_ciphertexts.csv')

print("Training and testing data saved to 'training_ciphertexts.csv' and 'testing_ciphertexts.csv'")

# Optional: Dividing testing data into 9 groups of 20
def divide_testing_data(testing_df, num_groups, group_size):
    groups = []
    for i in range(num_groups):
        start_idx = i * group_size
        end_idx = start_idx + group_size
        groups.append(testing_df[start_idx:end_idx])
    return groups

# Divide the testing data into 9 groups of 20
groups = divide_testing_data(df_testing, 9, 20)

# Save each group to separate CSV files
for i, group in enumerate(groups):
    group.to_csv(f'testing_group_{i+1}.csv', index=False)
    print(f"Testing group {i+1} saved to 'testing_group_{i+1}.csv'")


Total samples in data: 1100
Number of training samples: 200
Number of testing samples: 900
Training and testing data saved to 'training_ciphertexts.csv' and 'testing_ciphertexts.csv'
Testing group 1 saved to 'testing_group_1.csv'
Testing group 2 saved to 'testing_group_2.csv'
Testing group 3 saved to 'testing_group_3.csv'
Testing group 4 saved to 'testing_group_4.csv'
Testing group 5 saved to 'testing_group_5.csv'
Testing group 6 saved to 'testing_group_6.csv'
Testing group 7 saved to 'testing_group_7.csv'
Testing group 8 saved to 'testing_group_8.csv'
Testing group 9 saved to 'testing_group_9.csv'


In [10]:
# Confirm the number of samples per algorithm in the original data
for algo in ['AES', 'Blowfish', '3DES', 'DES', 'RC4']:
    num_samples_per_algo = len(df[df['Algorithm'] == algo])
    print(f"Number of samples for {algo}: {num_samples_per_algo}")


Number of samples for AES: 220
Number of samples for Blowfish: 220
Number of samples for 3DES: 220
Number of samples for DES: 220
Number of samples for RC4: 220


In [11]:
import numpy as np
import pandas as pd
from scipy.stats import chisquare
from collections import Counter

# English letter frequencies (relative frequencies)
english_freq = {
    'a': 0.08167, 'b': 0.01492, 'c': 0.02782, 'd': 0.04253,
    'e': 0.12702, 'f': 0.02228, 'g': 0.02015, 'h': 0.06094,
    'i': 0.06966, 'j': 0.00153, 'k': 0.00772, 'l': 0.04025,
    'm': 0.02406, 'n': 0.06749, 'o': 0.07507, 'p': 0.01929,
    'q': 0.00095, 'r': 0.05987, 's': 0.06327, 't': 0.09056,
    'u': 0.02758, 'v': 0.00978, 'w': 0.02360, 'x': 0.00150,
    'y': 0.01974, 'z': 0.00074
}

# Load ciphertexts from CSV file
def load_ciphertexts_from_csv(filename):
    df = pd.read_csv(filename)
    return df

# Frequency distribution of characters
def frequency_distribution(text):
    text = text.lower()
    freq = Counter(text)
    total = sum(freq.values())
    return {char: count / total for char, count in freq.items()}

# Chi-squared statistic
def chi_squared_statistic(freq_dist):
    # Filter out English letters that are not in the text
    filtered_english_freq = {char: freq for char, freq in english_freq.items() if char in freq_dist}
    observed = np.array([freq_dist.get(char, 0) for char in filtered_english_freq])
    expected = np.array([filtered_english_freq[char] for char in filtered_english_freq])

    # Check if both distributions are non-empty
    if observed.sum() == 0 or expected.sum() == 0:
        return float('nan')  # Return NaN if the data is not valid

    # Normalize observed and expected frequencies
    observed = observed / observed.sum()
    expected = expected / expected.sum()

    return chisquare(observed, expected)[0]

# Index of Coincidence
def index_of_coincidence(text):
    text = text.lower()
    length = len(text)
    freq = Counter(text)
    return sum(count * (count - 1) for count in freq.values()) / (length * (length - 1))

# Max IC for periods 1-15
def max_ic_for_periods(ciphertext, max_period=15):
    def split_text(text, period):
        return [text[i::period] for i in range(period)]

    def avg_ic(text_list):
        return np.mean([index_of_coincidence(text) for text in text_list])

    ics = []
    for period in range(1, max_period + 1):
        split_texts = split_text(ciphertext, period)
        ics.append(avg_ic(split_texts))
    return max(ics)

# Max Kappa for periods 1-15
def max_kappa_for_periods(ciphertext, max_period=15):
    def kappa(text, period):
        shifted_text = text[period:] + text[:period]
        return np.mean([text[i] == shifted_text[i] for i in range(len(text))])

    kappa_values = []
    for period in range(1, max_period + 1):
        kappa_values.append(kappa(ciphertext, period))
    return max(kappa_values)

# Digraphic Index of Coincidence
def digraphic_index_of_coincidence(text):
    pairs = [text[i:i+2] for i in range(len(text) - 1)]
    freq = Counter(pairs)
    total = sum(freq.values())
    return sum(count * (count - 1) for count in freq.values()) / (total * (total - 1))

# DIC for even-numbered pairs
def dic_for_even_numbered_pairs(text):
    pairs = [text[i:i+2] for i in range(0, len(text) - 1, 2)]
    freq = Counter(pairs)
    total = sum(freq.values())
    return sum(count * (count - 1) for count in freq.values()) / (total * (total - 1))

# Long repeat
def long_repeat(text):
    repeats = [text[i:i+3] for i in range(len(text) - 2)]
    freq = Counter(repeats)
    long_repeats = [repeat for repeat, count in freq.items() if count > 1]
    return np.sqrt(len(long_repeats) / len(repeats)) if repeats else 0

# Percentage of odd-spaced repeats
def percentage_of_odd_spaced_repeats(text):
    repeats = [text[i:i+3] for i in range(len(text) - 2)]
    odd_spaced_repeats = [repeat for i, repeat in enumerate(repeats) if repeat in repeats[:i]]
    return len(odd_spaced_repeats) / len(repeats) * 100 if repeats else 0

# Log digraph score (example placeholder data)
def log_digraph_score(text, log_digraph_scores):
    pairs = [text[i:i+2] for i in range(len(text) - 1)]
    total_score = sum(log_digraph_scores.get(pair, 0) for pair in pairs)
    return total_score / len(pairs) if pairs else 0

# Single letter-digraph discrepancy score (example placeholder data)
def single_letter_digraph_discrepancy(text, single_letter_scores, digraph_scores):
    single_scores = [single_letter_scores.get(char, 0) for char in text]
    digraph_scores_list = [digraph_scores.get(text[i:i+2], 0) for i in range(len(text) - 1)]
    return np.mean(single_scores) + np.mean(digraph_scores_list) if text else 0

# Load ciphertexts from the CSV file
df = load_ciphertexts_from_csv('training_ciphertexts.csv')

# Define log digraph scores and single letter scores (example data)
log_digraph_scores = {'ex': 0.1, 'xa': 0.05}  # Example data
single_letter_scores = {'e': 0.2, 'x': 0.1}  # Example data
digraph_scores = {'ex': 0.15, 'xa': 0.1}  # Example data

# Extract features for each ciphertext
features = []
for index, row in df.iterrows():
    ciphertext = row['Ciphertext']  # Adjust column name if necessary
    features.append({
        'Algorithm': row['Algorithm'],  # Include algorithm for reference
        'Number of Unique Characters': len(set(ciphertext)),
        'Chi-Squared Statistic': chi_squared_statistic(frequency_distribution(ciphertext)),
        'Index of Coincidence': index_of_coincidence(ciphertext),
        'Max IC for Periods 1-15': max_ic_for_periods(ciphertext),
        'Max Kappa for Periods 1-15': max_kappa_for_periods(ciphertext),
        'Digraphic Index of Coincidence': digraphic_index_of_coincidence(ciphertext),
        'DIC for Even-Numbered Pairs': dic_for_even_numbered_pairs(ciphertext),
        'Long Repeat': long_repeat(ciphertext),
        'Percentage of Odd-Spaced Repeats': percentage_of_odd_spaced_repeats(ciphertext),
        'Log Digraph Score': log_digraph_score(ciphertext, log_digraph_scores),
        'Single Letter-Digraph Discrepancy Score': single_letter_digraph_discrepancy(ciphertext, single_letter_scores, digraph_scores)
    })

# Convert features to DataFrame and save to CSV
features_df = pd.DataFrame(features)
features_df.to_csv('ciphertext_features.csv', index=False)

print("Feature extraction complete. Results saved to 'ciphertext_features.csv'")


Feature extraction complete. Results saved to 'ciphertext_features.csv'


In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Load features from CSV file
def load_features_from_csv(filename):
    return pd.read_csv(filename)

# Load features
features_df = load_features_from_csv('ciphertext_features.csv')

# Separate features and labels
X = features_df.drop(columns=['Algorithm'])
y = features_df['Algorithm']

# Encode categorical labels as integers
y_encoded = y.astype('category').cat.codes

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

# Initialize and train SVM classifier
svm_classifier = SVC(kernel='linear')  # You can use other kernels like 'rbf' or 'poly'
svm_classifier.fit(X_train, y_train)

# Predict on test set using SVM
y_pred_svm = svm_classifier.predict(X_test)

# Evaluate the SVM model
print("Confusion Matrix for SVM:")
print(confusion_matrix(y_test, y_pred_svm))

print("\nClassification Report for SVM:")
print(classification_report(y_test, y_pred_svm, target_names=y.unique()))

# Initialize and train Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict on test set using Random Forest
y_pred_rf = rf_classifier.predict(X_test)

# Evaluate the Random Forest model
print("\nConfusion Matrix for Random Forest:")
print(confusion_matrix(y_test, y_pred_rf))

print("\nClassification Report for Random Forest:")
print(classification_report(y_test, y_pred_rf, target_names=y.unique()))

# Feature importance from Random Forest
feature_importances = rf_classifier.feature_importances_
features = X.columns

# Print the feature importances
print("\nFeature Importances from Random Forest:")
for feature, importance in zip(features, feature_importances):
    print(f"{feature}: {importance}")

# Optionally, save the trained models
joblib.dump(svm_classifier, 'svm_classifier_model.joblib')
joblib.dump(rf_classifier, 'rf_classifier_model.joblib')
print("\nModels saved to 'svm_classifier_model.joblib' and 'rf_classifier_model.joblib'")


Confusion Matrix for SVM:
[[ 3  2  2  6  1]
 [ 2 10  0  0  0]
 [ 2  2  3  3  1]
 [ 2  4  0  4  3]
 [ 0  0  0  0 10]]

Classification Report for SVM:
              precision    recall  f1-score   support

    Blowfish       0.33      0.21      0.26        14
         DES       0.56      0.83      0.67        12
         AES       0.60      0.27      0.37        11
        3DES       0.31      0.31      0.31        13
         RC4       0.67      1.00      0.80        10

    accuracy                           0.50        60
   macro avg       0.49      0.53      0.48        60
weighted avg       0.48      0.50      0.46        60


Confusion Matrix for Random Forest:
[[ 7  1  2  3  1]
 [ 1 10  1  0  0]
 [ 2  3  6  0  0]
 [ 4  1  2  3  3]
 [ 0  0  0  0 10]]

Classification Report for Random Forest:
              precision    recall  f1-score   support

    Blowfish       0.50      0.50      0.50        14
         DES       0.67      0.83      0.74        12
         AES       0.55      

In [14]:
import numpy as np
import pandas as pd
from scipy.stats import chisquare
from collections import Counter

# English letter frequencies (relative frequencies)
english_freq = {
    'a': 0.08167, 'b': 0.01492, 'c': 0.02782, 'd': 0.04253,
    'e': 0.12702, 'f': 0.02228, 'g': 0.02015, 'h': 0.06094,
    'i': 0.06966, 'j': 0.00153, 'k': 0.00772, 'l': 0.04025,
    'm': 0.02406, 'n': 0.06749, 'o': 0.07507, 'p': 0.01929,
    'q': 0.00095, 'r': 0.05987, 's': 0.06327, 't': 0.09056,
    'u': 0.02758, 'v': 0.00978, 'w': 0.02360, 'x': 0.00150,
    'y': 0.01974, 'z': 0.00074
}

# Load ciphertexts from CSV file
def load_ciphertexts_from_csv(filename):
    return pd.read_csv(filename)

# Frequency distribution of characters
def frequency_distribution(text):
    text = text.lower()
    freq = Counter(text)
    total = sum(freq.values())
    return {char: count / total for char, count in freq.items()}

# Chi-squared statistic
def chi_squared_statistic(freq_dist):
    # Filter out English letters that are not in the text
    filtered_english_freq = {char: freq for char, freq in english_freq.items() if char in freq_dist}
    observed = np.array([freq_dist.get(char, 0) for char in filtered_english_freq])
    expected = np.array([filtered_english_freq[char] for char in filtered_english_freq])

    # Check if both distributions are non-empty
    if observed.sum() == 0 or expected.sum() == 0:
        return float('nan')  # Return NaN if the data is not valid

    # Normalize observed and expected frequencies
    observed = observed / observed.sum()
    expected = expected / expected.sum()

    return chisquare(observed, expected)[0]

# Index of Coincidence
def index_of_coincidence(text):
    text = text.lower()
    length = len(text)
    freq = Counter(text)
    return sum(count * (count - 1) for count in freq.values()) / (length * (length - 1))

# Max IC for periods 1-15
def max_ic_for_periods(ciphertext, max_period=15):
    def split_text(text, period):
        return [text[i::period] for i in range(period)]

    def avg_ic(text_list):
        return np.mean([index_of_coincidence(text) for text in text_list])

    ics = []
    for period in range(1, max_period + 1):
        split_texts = split_text(ciphertext, period)
        ics.append(avg_ic(split_texts))
    return max(ics)

# Max Kappa for periods 1-15
def max_kappa_for_periods(ciphertext, max_period=15):
    def kappa(text, period):
        shifted_text = text[period:] + text[:period]
        return np.mean([text[i] == shifted_text[i] for i in range(len(text))])

    kappa_values = []
    for period in range(1, max_period + 1):
        kappa_values.append(kappa(ciphertext, period))
    return max(kappa_values)

# Digraphic Index of Coincidence
def digraphic_index_of_coincidence(text):
    pairs = [text[i:i+2] for i in range(len(text) - 1)]
    freq = Counter(pairs)
    total = sum(freq.values())
    return sum(count * (count - 1) for count in freq.values()) / (total * (total - 1))

# DIC for even-numbered pairs
def dic_for_even_numbered_pairs(text):
    pairs = [text[i:i+2] for i in range(0, len(text) - 1, 2)]
    freq = Counter(pairs)
    total = sum(freq.values())
    return sum(count * (count - 1) for count in freq.values()) / (total * (total - 1))

# Long repeat
def long_repeat(text):
    repeats = [text[i:i+3] for i in range(len(text) - 2)]
    freq = Counter(repeats)
    long_repeats = [repeat for repeat, count in freq.items() if count > 1]
    return np.sqrt(len(long_repeats) / len(repeats)) if repeats else 0

# Percentage of odd-spaced repeats
def percentage_of_odd_spaced_repeats(text):
    repeats = [text[i:i+3] for i in range(len(text) - 2)]
    odd_spaced_repeats = [repeat for i, repeat in enumerate(repeats) if repeat in repeats[:i]]
    return len(odd_spaced_repeats) / len(repeats) * 100 if repeats else 0

# Log digraph score (example placeholder data)
def log_digraph_score(text, log_digraph_scores):
    pairs = [text[i:i+2] for i in range(len(text) - 1)]
    total_score = sum(log_digraph_scores.get(pair, 0) for pair in pairs)
    return total_score / len(pairs) if pairs else 0

# Single letter-digraph discrepancy score (example placeholder data)
def single_letter_digraph_discrepancy(text, single_letter_scores, digraph_scores):
    single_scores = [single_letter_scores.get(char, 0) for char in text]
    digraph_scores_list = [digraph_scores.get(text[i:i+2], 0) for i in range(len(text) - 1)]
    return np.mean(single_scores) + np.mean(digraph_scores_list) if text else 0

# Define log digraph scores and single letter scores (example data)
log_digraph_scores = {'ex': 0.1, 'xa': 0.05}  # Example data
single_letter_scores = {'e': 0.2, 'x': 0.1}  # Example data
digraph_scores = {'ex': 0.15, 'xa': 0.1}  # Example data

# Extract features from multiple files
def extract_features_from_files(file_list):
    all_features = []
    for filename in file_list:
        df = load_ciphertexts_from_csv(filename)
        for index, row in df.iterrows():
            ciphertext = row['Ciphertext']  # Adjust column name if necessary
            features = {
                'Algorithm': row['Algorithm'],  # Include algorithm for reference
                'Number of Unique Characters': len(set(ciphertext)),
                'Chi-Squared Statistic': chi_squared_statistic(frequency_distribution(ciphertext)),
                'Index of Coincidence': index_of_coincidence(ciphertext),
                'Max IC for Periods 1-15': max_ic_for_periods(ciphertext),
                'Max Kappa for Periods 1-15': max_kappa_for_periods(ciphertext),
                'Digraphic Index of Coincidence': digraphic_index_of_coincidence(ciphertext),
                'DIC for Even-Numbered Pairs': dic_for_even_numbered_pairs(ciphertext),
                'Long Repeat': long_repeat(ciphertext),
                'Percentage of Odd-Spaced Repeats': percentage_of_odd_spaced_repeats(ciphertext),
                'Log Digraph Score': log_digraph_score(ciphertext, log_digraph_scores),
                'Single Letter-Digraph Discrepancy Score': single_letter_digraph_discrepancy(ciphertext, single_letter_scores, digraph_scores)
            }
            all_features.append(features)

    # Convert features to DataFrame and save to CSV
    features_df = pd.DataFrame(all_features)
    features_df.to_csv('ciphertext_features_combined.csv', index=False)
    print("Feature extraction complete. Results saved to 'ciphertext_features_combined.csv'")

# List of files
file_list = [f'testing_group_{i}.csv' for i in range(1, 10)]

# Extract features from all files
extract_features_from_files(file_list)


Feature extraction complete. Results saved to 'ciphertext_features_combined.csv'


In [23]:
import pandas as pd
import joblib

# Load the features from the CSV file
features_df = pd.read_csv('ciphertext_features_combined.csv')

# Load the trained SVM model
svm_classifier = joblib.load('rf_classifier_model.joblib')

# Define the mapping from class labels to algorithm names
# Update this mapping based on how your model was trained
class_labels = {
    0: 'aes',
    1: 'des',
    2: 'blowfish',
    3: 'rc4',
    4: '3des'
}

# Ensure that the columns in the test set match the training set
# Drop any columns that are not used during training
columns_to_keep = ['Number of Unique Characters', 'Chi-Squared Statistic', 'Index of Coincidence',
                    'Max IC for Periods 1-15', 'Max Kappa for Periods 1-15', 'Digraphic Index of Coincidence',
                    'DIC for Even-Numbered Pairs', 'Long Repeat', 'Percentage of Odd-Spaced Repeats',
                    'Log Digraph Score', 'Single Letter-Digraph Discrepancy Score']
features_df = features_df[columns_to_keep]

# Check if there are any missing columns in the test set compared to the training set
missing_cols = [col for col in columns_to_keep if col not in features_df.columns]
if missing_cols:
    raise ValueError(f"Missing columns in test data: {', '.join(missing_cols)}")

# Predict the class labels for the testing data
predictions = svm_classifier.predict(features_df)

# Map the predictions to algorithm names
predicted_algorithm_names = [class_labels[label] for label in predictions]

# Add predictions and algorithm names to the features DataFrame
features_df['Predicted_Label'] = predictions
features_df['Predicted_Algorithm'] = predicted_algorithm_names

# Save the results to a new CSV file
features_df.to_csv('ciphertext_predictions.csv', index=False)

print("Predictions and algorithm names have been saved to 'ciphertext_predictions.csv'")


Predictions and algorithm names have been saved to 'ciphertext_predictions.csv'


In [32]:
from Crypto.Cipher import DES, ARC4
from Crypto.Random import get_random_bytes
from Crypto.Util.Padding import pad, unpad

# Function to generate DES ciphertext
def generate_des_ciphertext(plaintext, key):
    # Ensure the key is 8 bytes (64 bits) for DES
    if len(key) != 8:
        raise ValueError("DES key must be 8 bytes long.")

    des = DES.new(key, DES.MODE_ECB)
    padded_plaintext = pad(plaintext, DES.block_size)
    ciphertext = des.encrypt(padded_plaintext)
    return ciphertext



# Example usage
if __name__ == "__main__":
    # Sample plaintext
    plaintext = b'This is a secret message.'

    # Generate random keys for DES and RC4
    des_key = get_random_bytes(8)  # DES requires 8-byte key

    # Generate DES ciphertext
    des_ciphertext = generate_des_ciphertext(plaintext, des_key)
    print(f"DES Ciphertext (hex): {des_ciphertext.hex()}")


    des = DES.new(des_key, DES.MODE_ECB)
    des_decrypted = unpad(des.decrypt(des_ciphertext), DES.block_size)
    print(f"DES Decrypted: {des_decrypted.decode()}")



DES Ciphertext (hex): 95eeee249ceee76a4ec0d5396ba215cadc1b58015f6f62670b3db70c62b1a830
DES Decrypted: This is a secret message.


In [33]:
import pandas as pd
import numpy as np
import joblib

# Load the trained model
svm_classifier = joblib.load('svm_classifier_model.joblib')

# Define class labels (these should match the labels used during training)
class_labels = ['AES', 'DES', 'Blowfish', 'RC4', '3DES']  # Replace with actual class labels if different

# Define feature extraction functions (omitting for brevity; assume they are defined)

# Define a function to classify a ciphertext
def classify_ciphertext(ciphertext):
    features = {
        'Number of Unique Characters': len(set(ciphertext)),
        'Chi-Squared Statistic': chi_squared_statistic(frequency_distribution(ciphertext)),
        'Index of Coincidence': index_of_coincidence(ciphertext),
        'Max IC for Periods 1-15': max_ic_for_periods(ciphertext),
        'Max Kappa for Periods 1-15': max_kappa_for_periods(ciphertext),
        'Digraphic Index of Coincidence': digraphic_index_of_coincidence(ciphertext),
        'DIC for Even-Numbered Pairs': dic_for_even_numbered_pairs(ciphertext),
        'Long Repeat': long_repeat(ciphertext),
        'Percentage of Odd-Spaced Repeats': percentage_of_odd_spaced_repeats(ciphertext),
        'Log Digraph Score': log_digraph_score(ciphertext, log_digraph_scores),
        'Single Letter-Digraph Discrepancy Score': single_letter_digraph_discrepancy(ciphertext, single_letter_scores, digraph_scores)
    }

    features_df = pd.DataFrame([features])

    # Predict using the classifier
    prediction_index = svm_classifier.predict(features_df)[0]

    # Map the prediction index to the class label
    predicted_algorithm = class_labels[prediction_index]

    return predicted_algorithm

# Input function to get ciphertext from user
def input_and_classify():
    ciphertext = input("Enter the ciphertext to classify: ")
    predicted_algorithm = classify_ciphertext(ciphertext)
    print(f"The predicted algorithm for the given ciphertext is: {predicted_algorithm}")

# Call the input and classify function
input_and_classify()


Enter the ciphertext to classify: 95eeee249ceee76a4ec0d5396ba215cadc1b58015f6f62670b3db70c62b1a830
The predicted algorithm for the given ciphertext is: DES
