In [7]:
import mlflow
import pandas as pd
from xgboost import XGBClassifier, plot_importance
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split


In [3]:
# parameters
seed = 42
test_size=0.3

In [4]:
df_raw = pd.read_feather("../../data/processed/data-set.ftr")


In [5]:
features = ['income', 'age', 'var1', 'lastVisit_year', "lastVisit_days", "product02_bin"]
df = df_raw[features]


In [8]:
def split_df(df):
    X = df.drop('product02_bin', axis=1)
    y = df['product02_bin']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)
    return X_train, X_test, y_train, y_test, X, y

X_train, X_test, y_train, y_test, X, y = split_df(df)


In [11]:
def scoring(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # True pos + True negatives / all predictions
    print(f'Accuracy: {accuracy}')
    
    # True pos of all positive predicted
    print(f'Precision: {precision}')
    
    # True pos of true pos + false neg
    print(f'Recall: {recall}')
    
    # weighted accuracy and precision
    print(f'F1-Score: {f1}')
    

In [12]:
xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train)
xgb_y_pred = xgb_model.predict(X_test)

scoring(y_test, xgb_y_pred)

Accuracy: 0.6746499631540163
Precision: 0.6437346437346437
Recall: 0.4691136974037601
F1-Score: 0.5427239772138789
