# Training Part 

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer,classification_report, matthews_corrcoef, accuracy_score, average_precision_score, roc_auc_score

#### Input data is read and named as the following

In [None]:
transactions = pd.read_csv('train.csv')
X_train = transactions.drop(labels='Class', axis=1)
y_train = transactions.loc[:,'Class']

In [None]:
num_folds = 5
# MCC_scorer = make_scorer(matthews_corrcoef)

#### Tuning parameters

In [None]:
rf = RandomForestClassifier(n_jobs=-1, random_state=1)

n_estimators = [700,800,1000,1500]

param_grid_rf = {'n_estimators': n_estimators,
                 'oob_score': [True]
                }

In [None]:
grid_rf = GridSearchCV(estimator=rf, param_grid=param_grid_rf, cv = 5, 
                       n_jobs=-1, pre_dispatch='2*n_jobs', verbose=1, return_train_score=False)


In [None]:
grid_rf.fit(X_train, y_train)

#### The best score and the estimator

In [None]:
grid_rf.best_score_

In [None]:
grid_rf.best_params_

# Evaluation Part 

In [None]:
evaluation = pd.read_csv('validation.csv')
X_eval = evaluation.drop(labels='Class', axis=1)
y_eval = evaluation.loc[:,'Class']

In [None]:
def Random_Forest_eval(estimator, X_test, y_test):
    
    y_pred = estimator.predict(X_test)

    print('Classification Report')
    print(classification_report(y_test, y_pred))
    if y_test.nunique() <= 2:
        try:
            y_score = estimator.predict_proba(X_test)[:,1]
        except:
            y_score = estimator.decision_function(X_test)
        print('AUPRC', average_precision_score(y_test, y_score))
        print('AUROC', roc_auc_score(y_test, y_score))
    

In [None]:
Random_Forest_eval(grid_rf, X_eval, y_eval)