In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [4]:

# Load dataset
data = pd.read_csv('C://Users//Jayashrinidhi V//OneDrive//Documents//VScode//AlgoMaster//encrypted_messages_dataset12.csv')


In [6]:

# Feature and target extraction
X = data['encrypted_message']
y = data['algorithm']

# Vectorization of encrypted messages (hex to numerical)
vectorizer = CountVectorizer(analyzer=lambda x: [x[i:i+2] for i in range(0, len(x), 2)])
X_vectorized = vectorizer.fit_transform(X)


In [7]:

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier()
model.fit(X_train, y_train)



In [8]:
# Prediction
y_pred = model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

        3DES       0.13      0.20      0.16        49
         AES       0.23      0.27      0.25        56
    Blowfish       0.23      0.15      0.18        66
    ChaCha20       0.28      0.27      0.27        64
         DES       0.22      0.18      0.20        65

    accuracy                           0.21       300
   macro avg       0.22      0.21      0.21       300
weighted avg       0.22      0.21      0.21       300



In [9]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix


# Convert encrypted message from hexadecimal to numerical feature
vectorizer = CountVectorizer(analyzer=lambda x: [x[i:i+2] for i in range(0, len(x), 2)])
X = vectorizer.fit_transform(data['encrypted_message'])

# Extract target
y = data['algorithm']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier(n_estimators=100, class_weight='balanced')
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

        3DES       0.16      0.27      0.20        49
         AES       0.14      0.21      0.17        56
    Blowfish       0.23      0.14      0.17        66
    ChaCha20       0.29      0.28      0.28        64
         DES       0.19      0.09      0.12        65

    accuracy                           0.19       300
   macro avg       0.20      0.20      0.19       300
weighted avg       0.20      0.19      0.19       300

[[13 16  6 13  1]
 [13 12 10 10 11]
 [23 12  9 14  8]
 [15 20  5 18  6]
 [19 23  9  8  6]]


In [31]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix


# Convert encrypted message from hexadecimal to a format usable for ML models
def hex_to_bytes(hex_string):
    return bytes.fromhex(hex_string)

data['encrypted_message_bytes'] = data['encrypted_message'].apply(hex_to_bytes)

# Create features (e.g., byte frequency histogram)
def create_features(byte_string):
    histogram = np.zeros(256, dtype=int)
    for byte in byte_string:
        histogram[byte] += 1
    return histogram

X = np.array([create_features(message) for message in data['encrypted_message_bytes']])
y = data['algorithm']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier(n_estimators=100, class_weight='balanced')
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

        3DES       0.16      0.27      0.20        49
         AES       0.17      0.27      0.21        56
    Blowfish       0.13      0.09      0.11        66
    ChaCha20       0.22      0.19      0.20        64
         DES       0.33      0.15      0.21        65

    accuracy                           0.19       300
   macro avg       0.20      0.19      0.19       300
weighted avg       0.21      0.19      0.18       300

[[13  9 11 11  5]
 [16 15  9 10  6]
 [20 23  6 14  3]
 [20 21  5 12  6]
 [14 19 15  7 10]]


In [39]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

# Load dataset and prepare features
# Assuming features are already processed into X and y
X = np.array([create_features(message) for message in data['encrypted_message_bytes']])
y = data['algorithm']

# Handle class imbalance
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Model training with hyperparameter tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'class_weight': ['balanced', None]
}
grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

# Prediction
y_pred = best_model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

        3DES       0.16      0.27      0.20        49
         AES       0.14      0.18      0.16        56
    Blowfish       0.21      0.14      0.17        66
    ChaCha20       0.27      0.27      0.27        64
         DES       0.18      0.12      0.15        65

    accuracy                           0.19       300
   macro avg       0.19      0.19      0.19       300
weighted avg       0.20      0.19      0.19       300

[[13  9  8 15  4]
 [14 10  4 14 14]
 [22 14  9 12  9]
 [14 16  8 17  9]
 [19 20 13  5  8]]
