In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import pickle

def hex_to_log(x):
    try:
        if isinstance(x, str):  
            return np.log1p(float(int(x, 16)))  
        else:
            return 0  
    except ValueError:
        return 0  


file_path = './datasets/1029_labeled_re.csv'
data = pd.read_csv(file_path)

# 데이터프레임(data)의 각 열(column)에 대해 데이터 타입(자료형)을 반환한다.
print(data.dtypes)

Time                 float64
Source                 int64
Destination            int64
Protocol              object
Length                 int64
ID                    object
Data                  object
Same Data              int64
Strange Data           int64
Entropy              float64
Dos Attack            object
Fuzzing Attack        object
Replaying Attack      object
label                  int64
IAT                  float64
IAT_Anomaly            int64
Message_Frequency      int64
Frequency_Anomaly      int64
dtype: object


In [2]:
# RandomForest로 훈련시킨 모델

data['ID'] = data['ID'].apply(hex_to_log)
data['Data'] = data['Data'].apply(hex_to_log)

features = ['Time', 'Length', 'ID', 'Data', 'Same Data', 'Entropy','IAT_Anomaly','Frequency_Anomaly']  
X = data[features]
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))

model_filename = 'rf_model.pkl'

with open(model_filename, 'wb') as file:
    pickle.dump(model, file)
print(f"Model saved to {model_filename}")

with open(model_filename, 'rb') as file:
    loaded_model = pickle.load(file)
print("Model loaded successfully")

loaded_y_pred = loaded_model.predict(X_test)
print("Loaded model prediction complete")

print(classification_report(y_test, loaded_y_pred))


              precision    recall  f1-score   support

           0       0.99      0.99      0.99     39906
           1       1.00      1.00      1.00      7059
           2       0.80      0.77      0.79      2065
           3       0.64      0.69      0.66       159

    accuracy                           0.98     49189
   macro avg       0.86      0.86      0.86     49189
weighted avg       0.98      0.98      0.98     49189

Model saved to rf_model.pkl
Model loaded successfully
Loaded model prediction complete
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     39906
           1       1.00      1.00      1.00      7059
           2       0.80      0.77      0.79      2065
           3       0.64      0.69      0.66       159

    accuracy                           0.98     49189
   macro avg       0.86      0.86      0.86     49189
weighted avg       0.98      0.98      0.98     49189

