In [36]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report
import joblib

def load_data(file_path):
    data = pd.read_csv(file_path)
    return data

def preprocess_data(data):
    label_encoder = LabelEncoder()
    data['label'] = label_encoder.fit_transform(data['label'])

    data = pd.get_dummies(data, columns=['src', 'dst', 'Protocol'])

    #print(combined_data.isnull().sum())
    data = data.dropna()

    X = data.drop('label', axis=1)
    y = data['label']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    return X_train, X_test, y_train, y_test

def train_model(X_train, y_train):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")
    print("Classification Report:\n", classification_report(y_test, y_pred))

def save_model(model, model_filename='OC_compare_model.pkl'):
    joblib.dump(model, model_filename)
    print(f"Model saved as {model_filename}")

def load_model(model_filename='OC_compare_model.pkl'):
    loaded_model = joblib.load(model_filename)
    return loaded_model

def predict_instance(model, input_instance):
    methods = {
        'SVM': SVC(),
        'DecisionTree': DecisionTreeClassifier(),
        'LogisticRegression': LogisticRegression(),
        'KMeans': KMeans(n_clusters=2),  # Specify the number of clusters as needed
        'PCA': PCA(n_components=2)
    }

    predictions_table = {}
    for method, method_model in methods.items():
        method_model.fit(X_train, y_train)
        prediction = method_model.predict(input_instance)
        predictions_table[method] = prediction[0]

    # Print the predictions table
    print("Predictions for the given instance:")
    print(pd.DataFrame(predictions_table.items(), columns=['Method', 'Predicted Class']))

# Load and preprocess data
data = load_data("/content/testdata.csv")
X_train,X_test,y_train,y_test = preprocess_data(data)

# Train the model with a specific method (e.g., RandomForest)
ddos_model = train_model(X_train, y_train)

# Evaluate the model
evaluate_model(ddos_model, X_test, y_test)

# Save the model
save_model(ddos_model, 'ddos_model.pkl')

# Later, when you want to use the model for predictions
loaded_model = load_model('ddos_model.pkl')

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         2

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5

Model saved as ddos_model.pkl
