In [None]:
def binary_classifier(clf, X, y, test_size, n_repeats):
    
    """
    Apply prediction model from features X and target to predict y.
    
    :param clf: Classifier model
    :param X: Features used by the classifier
    :param y: Target to predict
    :param test_size: Percentage of data for test
    :param n_repeats: Number of time we apply the model
    :return: Accuracy, recall and F1 measure from the model
    """

    # Arrays that contains computed metrics from the model n_repeats time
    accuracies = []
    recalls = []
    Fmeasures = []
    
    # For each iterations
    for n in range(1, n_repeats):
    
        # Split dataset into train and test
        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = test_size)
    
        # Train model
        clf.fit(X_train, y_train)

        # Predict target based on test data
        y_pred = clf.predict(X_test)

        # Create confusion matrix
        cm = confusion_matrix(y_test, y_pred)

        # Compute accuracy
        accuracy = accuracy_score(y_test, y_pred)

        # Compute recall
        recall = recall_score(y_test, y_pred)

        # Compute F score
        Fmeasure = f1_score(y_test, y_pred)

        # Append computed metrics from the model to temporary arrays
        accuracies.append(accuracy)
        recalls.append(recall)
        Fmeasures.append(Fmeasure)
        
    plot_decision_tree_results(cm, clf, X_train, y_train, X_test, y_test, accuracy, recall, Fmeasure)
        
    return accuracies, recalls, Fmeasures

In [None]:
def plot_decision_tree_results(cm, clf, X_train, y_train, X_test, y_test, accuracy, recall, Fmeasure):
    
    """
    Plot tree, confusion matrix and accuracy, recall, F1 measure from a decision tree model.
    
    :param cm: Confusion matrix
    :param clf: Classifier model
    :param X_train: Data features to train the model
    :param y_train: Data target to train the model
    :param X_test: Data features to test the model
    :param y_test: Data target to test the model
    :param accuracy, recall, Fmeasure: Metrics to evaluate the model
    """
    
    # Train model on test data
    clf.fit(X_test, y_test)
    
    # Plot confusion matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm,
    display_labels=clf.classes_)
    disp.plot()
    plt.title("Confusion Matrix", fontsize=20)
    plt.show()
    print("accuracy :", accuracy)
    print("recall :", recall)
    print("Fmeasure :", Fmeasure)

    # Plot decision tree
    fig = plt.figure(figsize=(25,10))
    _ = tree.plot_tree(clf,
                   feature_names=X_train.astype(str).columns,  
                   class_names=clf.classes_.astype(str),
                   fontsize=25,
                   filled=True)
    plt.title("Decision Tree", fontsize=35)
    
    plt.show()

In [None]:
def cross_validation_binary_classifier(clf, X, y, test_size, n_repeats):
    
    """
    Cross-validation onto a classifier clf.
    
    :param clf: Classifier model
    :param X: Features
    :param y_train: Target to predict
    :param test_size: Percentage of test data
    :param n_repeats: Number of time we apply the cross-validation
    :return: Accuracy, recall, Fmeasures metrics
    """
    
    # Arrays that contains computed metrics from the model n_repeats time
    accuracies = []
    recalls = []
    Fmeasures = []
    
    # Execute Cross-Validation
    cv = RepeatedKFold(n_splits = n_splits, n_repeats= n_repeats)
    
    # getting metrics values
    accuracies = cross_val_score(clf, X, y, scoring='accuracy', cv=cv, error_score="raise")
    recalls = cross_val_score(clf, X, y, scoring='recall', cv=cv, error_score="raise")
    Fmeasures = cross_val_score(clf, X, y, scoring='f1', cv=cv, error_score="raise")
    
    return accuracies, recalls, Fmeasures