In [2]:
#importing required libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

#loading dataset
df=pd.read_csv("mainSimulationAccessTraces.csv")

#balancing the dataset
normal_df=df[df['normality']=="normal"]
anomaly_df=df[df['normality']!="normal"]
balanced_normal_df=normal_df.sample(n=len(anomaly_df),random_state=42)
balanced_df=pd.concat([balanced_normal_df,anomaly_df]).sample(frac=1,random_state=42).reset_index(drop=True)

#dropping 'timestamp' column because it does not provide direct predictive value without transformation and is not essential for the model's performance.
balanced_df.drop(columns=['timestamp'],inplace=True)

#seperating features and target
X=balanced_df.drop(columns=['normality'])
y=balanced_df['normality']

#label encoding all categorical columns
for col in X.select_dtypes(include='object').columns:
  X[col]=LabelEncoder().fit_transform(X[col])

#scaling features
scaler=StandardScaler()
X_scaled=scaler.fit_transform(X)

#train-test split
X_train,X_test,y_train,y_test=train_test_split(X_scaled,y,test_size=0.2,random_state=42,stratify=y)

#training svm
svm=SVC(kernel='rbf',class_weight='balanced')
svm.fit(X_train,y_train)

#evaluate
y_pred=svm.predict(X_test)
print(classification_report(y_test,y_pred))

                               precision    recall  f1-score   support

         anomalous(DoSattack)       0.98      1.00      0.99      1156
       anomalous(dataProbing)       1.00      1.00      1.00        68
  anomalous(malitiousControl)       1.00      1.00      1.00       178
anomalous(malitiousOperation)       0.99      1.00      1.00       161
              anomalous(scan)       1.00      1.00      1.00       310
            anomalous(spying)       0.95      1.00      0.98       106
        anomalous(wrongSetUp)       1.00      1.00      1.00        24
                       normal       1.00      0.98      0.99      2004

                     accuracy                           0.99      4007
                    macro avg       0.99      1.00      0.99      4007
                 weighted avg       0.99      0.99      0.99      4007

