In [None]:
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, f1_score

In [2]:
data = pd.read_csv('data/flow_ver1.csv')

In [74]:
data['Label'] = data['Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)

In [75]:
label = data['Label'].copy()
data = data.drop(columns='Label')

In [76]:
train_data, test_data, train_label, test_label = train_test_split(data, label, test_size=0.1, stratify=label, random_state=42)

In [77]:
model = RandomForestClassifier()
model.fit(train_data.iloc[:, 8:], train_label)

In [79]:
preds = model.predict(test_data.iloc[:, 8:])

In [80]:
def score(y_true, y_pred):
    # Calculate accuracy
    accuracy = accuracy_score(y_true, y_pred)
    
    # Calculate precision
    precision = precision_score(y_true, y_pred)
    
    # Calculate recall
    recall = recall_score(y_true, y_pred)
    
    # Calculate F1 score
    f1 = f1_score(y_true, y_pred)
    
    # Return a dictionary containing the calculated metrics
    return {'accuracy': accuracy, 'precision': precision,
            'recall': recall, 'f1_score': f1}

In [81]:
score(test_label, preds)

{'accuracy': 0.9998513352794897,
 'precision': 0.9998508575689784,
 'recall': 0.999896742809284,
 'f1_score': 0.999873799662701}

In [86]:
joblib.dump(model, 'model/RF.model')

['model/RF.model']

In [4]:
safe = data[data['Label'] == 'BENIGN']
non_safe = data[data['Label'] == 'APT']

In [10]:
safe = safe.sample(n=10, random_state=42)
non_safe = non_safe.sample(n=10, random_state=42)

In [51]:
final = pd.concat([safe, non_safe]).sample(frac=1.0).sort_values(by='publicIP').reset_index(drop=True)

In [15]:
data['label'] = data['Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)

In [40]:
temp = data.groupby('publicIP').agg({'Label': 'count', 'label': 'sum'})
temp['x'] = temp.apply(lambda x: 'yes' if x['Label'] != x['label'] and x['label'] != 0 else 'no', axis=1)