In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler

# Load your dataset (replace with your actual file path)
df = pd.read_csv('D:/Ransomware_Detect2/Ransomware_Detect2/dataset/data_file.csv')

# Check the distribution of the 'Benign' column
print("Class distribution in target variable (Benign):")
print(df['Benign'].value_counts())

# If only one class is present, raise an error
if df['Benign'].nunique() == 1:
    raise ValueError("The target variable 'Benign' must have both 0 and 1 values. Currently, it has only one class.")

# Separate features (X) and target variable (y)
X = df.drop(columns=['Benign', 'FileName', 'md5Hash'])  # Drop unnecessary columns (if any)
y = df['Benign']

# Split the data into train and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Feature scaling for better performance with SVM
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Handle class imbalance using SMOTE (Synthetic Minority Over-sampling Technique)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

# Create and train the SVM model with a linear kernel
svm_model = SVC(kernel='linear', C=1, random_state=42)
svm_model.fit(X_res, y_res)

# Make predictions on the test set
y_pred = svm_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")


Class distribution in target variable (Benign):
Benign
0    35367
1    27118
Name: count, dtype: int64
Accuracy: 0.9221
Precision: 0.8778
Recall: 0.9534
F1-Score: 0.9140


In [11]:
import joblib

# Save the SVM model
joblib.dump(svm_model, 'svm_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

print("Model and scaler saved successfully!")


Model and scaler saved successfully!


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import joblib

# Load your dataset (replace with your actual file path)
df = pd.read_csv('D:/Ransomware_Detect2/Ransomware_Detect2/dataset/data_file.csv')

# Check the distribution of the 'Benign' column
print("Class distribution in target variable (Benign):")
print(df['Benign'].value_counts())

# If only one class is present, raise an error
if df['Benign'].nunique() == 1:
    raise ValueError("The target variable 'Benign' must have both 0 and 1 values. Currently, it has only one class.")

# Separate features (X) and target variable (y)
X = df.drop(columns=['Benign', 'FileName', 'md5Hash'])  # Drop unnecessary columns (if any)
y = df['Benign']

# Split the data into train and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Feature scaling for better performance with KNN
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Handle class imbalance using SMOTE (Synthetic Minority Over-sampling Technique)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

# Create and train the KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)  # You can adjust n_neighbors for better performance
knn_model.fit(X_res, y_res)

# Make predictions on the test set
y_pred = knn_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Save the KNN model
joblib.dump(knn_model, 'knn_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

print("Model and scaler saved successfully!")


Class distribution in target variable (Benign):
Benign
0    35367
1    27118
Name: count, dtype: int64
Accuracy: 0.9920
Precision: 0.9908
Recall: 0.9908
F1-Score: 0.9908
Model and scaler saved successfully!


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import joblib

# Load your dataset (replace with your actual file path)
df = pd.read_csv('D:/Ransomware_Detect2/Ransomware_Detect2/dataset/data_file.csv')

# Check the distribution of the 'Benign' column
print("Class distribution in target variable (Benign):")
print(df['Benign'].value_counts())

# If only one class is present, raise an error
if df['Benign'].nunique() == 1:
    raise ValueError("The target variable 'Benign' must have both 0 and 1 values. Currently, it has only one class.")

# Separate features (X) and target variable (y)
X = df.drop(columns=['Benign', 'FileName', 'md5Hash'])  # Drop unnecessary columns (if any)
y = df['Benign']

# Split the data into train and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Feature scaling for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Handle class imbalance using SMOTE (Synthetic Minority Over-sampling Technique)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

# Create and train the Naive Bayes model
nb_model = GaussianNB()
nb_model.fit(X_res, y_res)

# Make predictions on the test set
y_pred = nb_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Save the Naive Bayes model
joblib.dump(nb_model, 'naive_bayes_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

print("Model and scaler saved successfully!")


Class distribution in target variable (Benign):
Benign
0    35367
1    27118
Name: count, dtype: int64
Accuracy: 0.5486
Precision: 0.4901
Recall: 0.9923
F1-Score: 0.6561
Model and scaler saved successfully!


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import joblib

# Load your dataset (replace with your actual file path)
df = pd.read_csv('D:/Ransomware_Detect2/Ransomware_Detect2/dataset/data_file.csv')

# Check the distribution of the 'Benign' column
print("Class distribution in target variable (Benign):")
print(df['Benign'].value_counts())

# If only one class is present, raise an error
if df['Benign'].nunique() == 1:
    raise ValueError("The target variable 'Benign' must have both 0 and 1 values. Currently, it has only one class.")

# Separate features (X) and target variable (y)
X = df.drop(columns=['Benign', 'FileName', 'md5Hash'])  # Drop unnecessary columns (if any)
y = df['Benign']

# Split the data into train and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Feature scaling for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Handle class imbalance using SMOTE (Synthetic Minority Over-sampling Technique)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

# Create and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_res, y_res)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Save the Random Forest model
joblib.dump(rf_model, 'random_forest_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

print("Model and scaler saved successfully!")


Class distribution in target variable (Benign):
Benign
0    35367
1    27118
Name: count, dtype: int64
Accuracy: 0.9966
Precision: 0.9965
Recall: 0.9956
F1-Score: 0.9960
Model and scaler saved successfully!


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import joblib

# Load your dataset (replace with your actual file path)
df = pd.read_csv('D:/Ransomware_Detect2/Ransomware_Detect2/dataset/data_file.csv')

# Check the distribution of the 'Benign' column
print("Class distribution in target variable (Benign):")
print(df['Benign'].value_counts())

# If only one class is present, raise an error
if df['Benign'].nunique() == 1:
    raise ValueError("The target variable 'Benign' must have both 0 and 1 values. Currently, it has only one class.")

# Separate features (X) and target variable (y)
X = df.drop(columns=['Benign', 'FileName', 'md5Hash'])  # Drop unnecessary columns (if any)
y = df['Benign']

# Split the data into train and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Feature scaling for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Handle class imbalance using SMOTE (Synthetic Minority Over-sampling Technique)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

# Create and train the Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_res, y_res)

# Make predictions on the test set
y_pred = dt_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Save the Decision Tree model
joblib.dump(dt_model, 'decision_tree_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

print("Model and scaler saved successfully!")


Class distribution in target variable (Benign):
Benign
0    35367
1    27118
Name: count, dtype: int64
Accuracy: 0.9944
Precision: 0.9928
Recall: 0.9943
F1-Score: 0.9936
Model and scaler saved successfully!


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import joblib

# Load your dataset (replace with your actual file path)
df = pd.read_csv('D:/Ransomware_Detect2/Ransomware_Detect2/dataset/data_file.csv')

# Check the distribution of the 'Benign' column
print("Class distribution in target variable (Benign):")
print(df['Benign'].value_counts())

# If only one class is present, raise an error
if df['Benign'].nunique() == 1:
    raise ValueError("The target variable 'Benign' must have both 0 and 1 values. Currently, it has only one class.")

# Separate features (X) and target variable (y)
X = df.drop(columns=['Benign', 'FileName', 'md5Hash'])  # Drop unnecessary columns (if any)
y = df['Benign']

# Split the data into train and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Feature scaling for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Handle class imbalance using SMOTE (Synthetic Minority Over-sampling Technique)
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

# Create and train the Logistic Regression model
linear_model = LogisticRegression(random_state=42)
linear_model.fit(X_res, y_res)

# Make predictions on the test set
y_pred = linear_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Save the Logistic Regression model
joblib.dump(linear_model, 'linear_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

print("Model and scaler saved successfully!")


Class distribution in target variable (Benign):
Benign
0    35367
1    27118
Name: count, dtype: int64
Accuracy: 0.9117
Precision: 0.9136
Recall: 0.8796
F1-Score: 0.8963
Model and scaler saved successfully!
