In [5]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

file_path = "BCCC-SCsVul-2024.csv"
df = pd.read_csv(file_path)

df = df.drop(columns=['ID'], errors='ignore')


target_columns = [col for col in df.columns if "Class" in col]
df['Target'] = df[target_columns].idxmax(axis=1)
df = df.drop(columns=target_columns)

le = LabelEncoder()
df['Target'] = le.fit_transform(df['Target'])


df = df.apply(pd.to_numeric, errors='coerce')


X = df.drop(columns=['Target'])
y = df['Target']


X = X.fillna(X.mean())

scaler = StandardScaler()
X = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


print("\nðŸ”¹ SVM Training Started...")
start_train_svm = time.time()
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)
end_train_svm = time.time()
print(f"âœ… SVM Training Completed in {end_train_svm - start_train_svm:.2f} sec")


print("\nðŸ”¹ SGD SVM Training Started...")
start_train_sgd = time.time()
sgd_svm = SGDClassifier(loss="hinge", random_state=42)
sgd_svm.fit(X_train, y_train)
end_train_sgd = time.time()
print(f"âœ… SGD SVM Training Completed in {end_train_sgd - start_train_sgd:.2f} sec")


print("\nðŸ”¹ SVM Testing Started...")
start_test_svm = time.time()
y_pred_svm = svm_model.predict(X_test)
end_test_svm = time.time()
print(f"âœ… SVM Testing Completed in {end_test_svm - start_test_svm:.2f} sec")

print("\nðŸ”¹ SGD SVM Testing Started...")
start_test_sgd = time.time()
y_pred_sgd = sgd_svm.predict(X_test)
end_test_sgd = time.time()
print(f"âœ… SGD SVM Testing Completed in {end_test_sgd - start_test_sgd:.2f} sec")


print("\nðŸ”¹ Evaluating SVM Model...")
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Model Accuracy: {accuracy_svm:.4f}")
print("Classification Report (SVM):\n", classification_report(y_test, y_pred_svm))
print("Confusion Matrix (SVM):\n", confusion_matrix(y_test, y_pred_svm))

print("\nðŸ”¹ Evaluating SGD SVM Model...")
accuracy_sgd = accuracy_score(y_test, y_pred_sgd)
print(f"SGD SVM Model Accuracy: {accuracy_sgd:.4f}")
print("Classification Report (SGD SVM):\n", classification_report(y_test, y_pred_sgd))
print("Confusion Matrix (SGD SVM):\n", confusion_matrix(y_test, y_pred_sgd))


print("\nðŸ”¹ Execution Time Summary:")
print(f"SVM Training Time: {end_train_svm - start_train_svm:.2f} sec")
print(f"SVM Testing Time: {end_test_svm - start_test_svm:.2f} sec")
print(f"SGD SVM Training Time: {end_train_sgd - start_train_sgd:.2f} sec")
print(f"SGD SVM Testing Time: {end_test_sgd - start_test_sgd:.2f} sec")



ðŸ”¹ SVM Training Started...
âœ… SVM Training Completed in 172.47 sec

ðŸ”¹ SGD SVM Training Started...
âœ… SGD SVM Training Completed in 23.69 sec

ðŸ”¹ SVM Testing Started...
âœ… SVM Testing Completed in 36.73 sec

ðŸ”¹ SGD SVM Testing Started...
âœ… SGD SVM Testing Completed in 0.02 sec

ðŸ”¹ Evaluating SVM Model...
SVM Model Accuracy: 0.9786
Classification Report (SVM):
               precision    recall  f1-score   support

           0       0.96      0.97      0.97       743
           1       0.97      0.96      0.96      1384
           2       0.95      0.96      0.96      1039
           3       0.90      0.93      0.91       511
           4       0.95      0.93      0.94       732
           5       0.96      0.93      0.94       606
           6       0.89      0.96      0.92       376
           7       0.99      0.98      0.98      2253
           8       0.98      0.99      0.98      2509
           9       0.99      0.98      0.99      3384
          10       0.99   