In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report
import joblib

def load_data(file_path):
    data = pd.read_csv(file_path)
    return data

def preprocess_data(data):
    label_encoder = LabelEncoder()
    data['label'] = label_encoder.fit_transform(data['label'])

    data = pd.get_dummies(data, columns=['src', 'dst', 'Protocol'])

    #print(combined_data.isnull().sum())
    data = data.dropna()

    X = data.drop('label', axis=1)
    y = data['label']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    return X_train, X_test, y_train, y_test

def train_model(X_train, y_train):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

def save_model(model, model_filename='OC_compare_model.pkl'):
    joblib.dump(model, model_filename)
    print(f"Model saved as {model_filename}")

def load_model(model_filename='OC_compare_model.pkl'):
    loaded_model = joblib.load(model_filename)
    return loaded_model


# Load and preprocess data
data = load_data("/content/dataset_sdn.csv")
X_train,X_test,y_train,y_test = preprocess_data(data)

# Train the model with a RandomForest
ddos_model = train_model(X_train, y_train)

#evalution already done in best_selection_method.ipynb

# Save the model
save_model(ddos_model, 'ddos_model.pkl')

Model saved as ddos_model.pkl
