In [None]:
import numpy as np
import preprocessing as pp
import evaluator as ev

RANDOM_STATE = 42

In [None]:
import data_loader as dl
import importlib
importlib.reload(dl)
m = dl.load_data()
m

In [None]:
from sklearn.ensemble import RandomForestClassifier
def predict_random_forest(df):

    x_train, y_train, x_test, y_test, test_df, label_map, features = pp.prepare_data(df)

    rf_model = RandomForestClassifier(
        n_estimators=500,
        max_depth=None,
        random_state=42,
        class_weight='balanced',
    )

    rf_model.fit(x_train, y_train)
    #Predict with RandomForestClassifier the probability of H_or_D or A
    prob = rf_model.predict_proba(x_test)
    pred_labels = np.argmax(prob, axis=1)


    ev.evaluate(y_test, prob, pred_labels, test_df, label_map, features, rf_model.feature_importances_)

In [None]:
importlib.reload(pp)
importlib.reload(ev)
predict_random_forest(m)

In [None]:
import lightgbm as lgb
def predict_lightgbm(df):
    X_train, y_train, X_test, y_test, test_df, label_map, features = pp.prepare_data(df)

    params = {
        'objective': 'multiclassova',
        'num_class': 2,
        'metric': 'multi_logloss',
        'verbosity': -1,
        'seed': RANDOM_STATE,
        'learning_rate': 0.05,
        'num_leaves': 31,
        'min_data_in_leaf': 20
    }
    lgb_tr = lgb.Dataset(X_train, label=y_train)
    gbm = lgb.train(params, lgb_tr, num_boost_round=1000)
    model = gbm


    preds = model.predict(X_test)
    pred_labels = np.argmax(preds, axis=1)
    ev.evaluate(y_test, preds, pred_labels, test_df, label_map, features, model.feature_importance(importance_type='gain'))

In [None]:
importlib.reload(pp)
importlib.reload(ev)
predict_lightgbm(m)

In [None]:
from xgboost import XGBClassifier

def predict_xgboost(df):
    X_train, y_train, X_test, y_test, test_df, label_map, features = pp.prepare_data(df)

    xgb_model = XGBClassifier(
        n_estimators=500,
        num_class=2,
        max_depth=6,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        objective='multi:softprob',  # for multiclass probability output
        eval_metric='mlogloss'       # multi-class log loss
    )

    # Fit model
    xgb_model.fit(X_train, y_train)

    # Predict probabilities
    prob = xgb_model.predict_proba(X_test)
    pred_labels = np.argmax(prob, axis=1)
    feature_importances = xgb_model.get_booster().get_score(importance_type='gain')

    # Evaluate model
    ev.evaluate(y_test, prob, pred_labels, test_df, label_map, features, feature_importances)

In [None]:
importlib.reload(pp)
importlib.reload(ev)
predict_xgboost(m)