<a href="https://colab.research.google.com/github/Raziasultan-786/machine-learning-01/blob/main/CICIDS_2017_ML_Analysis_Part3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CMP7239 Applied Machine Learning Assignment - Part 3
## Machine Learning Models Implementation and Evaluation

**Note:** This is a continuation of the analysis. Run Parts 1 and 2 before running this notebook.


## 6. Machine Learning Models Implementation

In [None]:
def train_random_forest(X_train, y_train, X_test, y_test):
    """
    Train and evaluate Random Forest classifier with hyperparameter tuning
    """
    print("=== RANDOM FOREST CLASSIFIER ===")

    # Define parameter grid for hyperparameter tuning
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [10, 20, None],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2]
    }

    # Create Random Forest classifier
    rf = RandomForestClassifier(random_state=42)

    # Perform Grid Search
    print("Performing hyperparameter tuning...")
    grid_search = GridSearchCV(
        rf, param_grid, cv=3, scoring='f1_macro', n_jobs=-1, verbose=1
    )

    grid_search.fit(X_train, y_train)

    # Best parameters
    print(f"\nBest parameters: {grid_search.best_params_}")
    print(f"Best cross-validation score: {grid_search.best_score_:.4f}")

    # Train final model with best parameters
    best_rf = grid_search.best_estimator_

    # Make predictions
    y_pred = best_rf.predict(X_test)
    y_pred_proba = best_rf.predict_proba(X_test)

    return best_rf, y_pred, y_pred_proba

# Train Random Forest
if 'X_train' in locals():
    rf_model, rf_pred, rf_pred_proba = train_random_forest(X_train, y_train, X_test, y_test)
else:
    print("Cannot train Random Forest - training data not available")

In [None]:
def train_svm(X_train, y_train, X_test, y_test):
    """
    Train and evaluate SVM classifier with hyperparameter tuning
    """
    print("\n=== SUPPORT VECTOR MACHINE ===")

    # Define parameter grid for hyperparameter tuning
    param_grid = {
        'C': [0.1, 1, 10],
        'kernel': ['rbf', 'linear'],
        'gamma': ['scale', 'auto']
    }

    # Create SVM classifier
    svm = SVC(random_state=42, probability=True)

    # Perform Grid Search
    print("Performing hyperparameter tuning...")
    grid_search = GridSearchCV(
        svm, param_grid, cv=3, scoring='f1_macro', n_jobs=-1, verbose=1
    )

    grid_search.fit(X_train, y_train)

    # Best parameters
    print(f"\nBest parameters: {grid_search.best_params_}")
    print(f"Best cross-validation score: {grid_search.best_score_:.4f}")

    # Train final model with best parameters
    best_svm = grid_search.best_estimator_

    # Make predictions
    y_pred = best_svm.predict(X_test)
    y_pred_proba = best_svm.predict_proba(X_test)

    return best_svm, y_pred, y_pred_proba

# Train SVM
if 'X_train' in locals():
    svm_model, svm_pred, svm_pred_proba = train_svm(X_train, y_train, X_test, y_test)
else:
    print("Cannot train SVM - training data not available")

In [None]:
def train_third_model(X_train, y_train, X_test, y_test):
    """
    Train and evaluate third classifier (XGBoost or Logistic Regression)
    """
    if XGBOOST_AVAILABLE:
        print("\n=== XGBOOST CLASSIFIER ===")

        # Define parameter grid for XGBoost
        param_grid = {
            'n_estimators': [100, 200],
            'max_depth': [3, 6],
            'learning_rate': [0.1, 0.2],
            'subsample': [0.8, 1.0]
        }

        # Create XGBoost classifier
        xgb_model = xgb.XGBClassifier(random_state=42, eval_metric='mlogloss')

        # Perform Grid Search
        print("Performing hyperparameter tuning...")
        grid_search = GridSearchCV(
            xgb_model, param_grid, cv=3, scoring='f1_macro', n_jobs=-1, verbose=1
        )

        grid_search.fit(X_train, y_train)

        # Best parameters
        print(f"\nBest parameters: {grid_search.best_params_}")
        print(f"Best cross-validation score: {grid_search.best_score_:.4f}")

        # Train final model with best parameters
        best_model = grid_search.best_estimator_

    else:
        print("\n=== LOGISTIC REGRESSION ===")

        # Define parameter grid for Logistic Regression
        param_grid = {
            'C': [0.1, 1, 10, 100],
            'solver': ['liblinear', 'lbfgs'],
            'max_iter': [1000, 2000]
        }

        # Create Logistic Regression classifier
        lr = LogisticRegression(random_state=42)

        # Perform Grid Search
        print("Performing hyperparameter tuning...")
        grid_search = GridSearchCV(
            lr, param_grid, cv=3, scoring='f1_macro', n_jobs=-1, verbose=1
        )

        grid_search.fit(X_train, y_train)

        # Best parameters
        print(f"\nBest parameters: {grid_search.best_params_}")
        print(f"Best cross-validation score: {grid_search.best_score_:.4f}")

        # Train final model with best parameters
        best_model = grid_search.best_estimator_

    # Make predictions
    y_pred = best_model.predict(X_test)
    y_pred_proba = best_model.predict_proba(X_test)

    return best_model, y_pred, y_pred_proba

# Train third model
if 'X_train' in locals():
    third_model, third_pred, third_pred_proba = train_third_model(X_train, y_train, X_test, y_test)
else:
    print("Cannot train third model - training data not available")