In [None]:
%run metrics.ipynb

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

In [None]:
def ForestModel(X_train, X_test, y_train, y_test):
    """
    Random Forest classifier to predict events
    param:  train and test sets split in x and y labels
            these have to be already encoded!
    return: print of metrics
    """
    X_labels = ["event concept:name", "prev_event", "event lifecycle:transition", 'prev_lifecycle']
    y_train["next_event"], y_test["next_event"] = pred_encoder_forest(y_train, y_test)
    X_forest, X_test_forest, y_forest, y_test_forest = feature_selection_forest(X_train, X_test, y_train, y_test, X_labels)
    y_pred = train_forest(X_forest, X_test_forest, y_forest)
    event_metrics(y_test_forest, y_pred, model="Random Forest")
    y_test["forest_pred"] = y_pred
    return y_test


def train_forest(X_train, X_test, y_train):
    """
    Train the forest and predict outcomes
    """
    forest_clf = RandomForestClassifier(n_estimators=10, max_depth = 160, bootstrap = True, criterion = 'entropy', random_state=42)
    forest_clf.fit(X_train, y_train)
    y_pred = forest_clf.predict(X_test)
    return y_pred

def feature_selection_forest(X_train, X_test, y_train, y_test, labels_x):
    """
    Selects appropriate features to train the model
    """
    X_forest = X_train[labels_x]
    X_test_forest = X_test[labels_x]
    y_forest = y_train[["next_event"]]
    y_test_forest = y_test[["next_event"]]
    return X_forest, X_test_forest, y_forest, y_test_forest

def pred_encoder_forest(y_train, y_test):
    """
    Encodes the data so that it can be used for prediction calculations
    """
    event_encoder = LabelEncoder()
    labels = y_train["next_event"].unique().copy()
    event_encoder.fit(labels)
    y_test_x = event_encoder.transform(y_test[["next_event"]])
    y_train_x = event_encoder.transform(y_train[["next_event"]])    
    return y_train_x, y_test_x