In [1]:
import polars as pl
import numpy as np
from datetime import datetime
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, balanced_accuracy_score

In [2]:
df_polars = pl.read_parquet('dataset.parquet')

In [3]:
df_polars = df_polars.sample(fraction=0.01, seed=42)

In [4]:
df_polars = df_polars.with_columns([
    pl.col('duration').fill_null(0),
    pl.col('orig_bytes').fill_null(0),
    pl.col('resp_bytes').fill_null(0)
])

In [5]:
df_polars = df_polars.drop(["ts", "uid", "id.orig_h", "id.resp_h", "local_orig", "local_resp", "missed_bytes" , "tunnel_parents", "detailed-label", "__index_level_0__"])

In [6]:
X = df_polars.drop('label')
y = df_polars['label']       

In [7]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Treinamento

In [9]:
kernels = ['rbf']
results = []

for kernel in kernels:
    svm = SVC(kernel=kernel, C=1, gamma='scale', tol=0.001, max_iter=5000)
    print(datetime.now())
    svm.fit(X_train, y_train)
    print(f"Treino Concluído para kernel={kernel}")
    print(datetime.now())
    
    y_pred = svm.predict(X_test)
    print(f"Predição Concluída para kernel={kernel}")
    print(datetime.now())
    confusion = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = confusion.ravel()
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    specificity = tn / (tn + fp)
    f1 = f1_score(y_test, y_pred)
    balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
    false_alarm_rate = fp / (fp + tn) if (fp + tn) > 0 else 0

    results.append([kernel, accuracy, balanced_accuracy, precision, recall, specificity, f1, false_alarm_rate, tn, fp, fn, tp])

2024-09-16 14:35:22.492069




Treino Concluído para kernel=rbf
2024-09-16 14:43:44.498681
Predição Concluída para kernel=rbf
2024-09-16 14:48:08.179185


In [10]:
metrics_df = pl.DataFrame(
    results,
    schema=['Kernel', 'Accuracy', 'Balanced Accuracy' , 'Precision', 'Recall', 'Specificity', 'F1-score', 'False Alarm Rate', 'tn', 'fp', 'fn', 'tp']
)
metrics_df

  return dispatch(args[0].__class__)(*args, **kw)


Kernel,Accuracy,Balanced Accuracy,Precision,Recall,Specificity,F1-score,False Alarm Rate,tn,fp,fn,tp
str,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64
"""rbf""",0.990088,0.991068,0.998557,0.989626,0.992511,0.994071,0.007489,26505,200,1451,138414
