In [1]:
import numpy as np  
import pandas as pd  
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
!pip install xgboost
from xgboost import XGBClassifier
import pyarrow.feather as feather
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
# Suppress warnings 
# (sometimes you might want to ignore warnings, that's how you can achieve this)
import warnings
import shap
warnings.filterwarnings('ignore')
RSEED= 42



In [2]:
export_df = feather.read_feather("../data/cleaned_data.feather")

In [3]:
# Function to split the dataset 
def splitdataset(df):
    y=export_df["interesting_message"]
    X=export_df.drop("interesting_message",axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X,y, stratify = y, test_size = 0.2,random_state = 42)
    print(y_train.dtypes)
    return X_train, X_test, y_train, y_test

In [4]:
def train_xgb(X_train, X_test, y_train): 
    # Creating the classifier object 
    xgb_class = XGBClassifier(use_label_encoder=False,eval_metric= "logloss",enable_categorical=True)
    xgb_parametering= {'scale_pos_weight': [1, 2, 4]}
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
    xgb_grid = GridSearchCV(estimator=xgb_class, param_grid=xgb_parametering, cv=cv,scoring='balanced_accuracy',
    verbose=10, n_jobs=-1)
    grid_result=xgb_grid.fit(X_train,y_train)
    best_model=grid_result.best_estimator_
    # report the best configuration
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    return best_model
    #TODO ADD SHAP

In [5]:
# Function to make predictions 
def prediction(X_test, model): 
    y_pred = model.predict(X_test)
    return y_pred 

In [6]:
# Function to calculate accuracy 
def class_metrics(y_test, y_pred): 
    accuracy = balanced_accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred).round()
    print("Predicted values:\n", y_pred) 
    print("Confusion Matrix: \n", cm) 
    print("Balanced Accuracy: %.4f%%" % (accuracy * 100.0))
    print("Report : \n", classification_report(y_test, y_pred))
    return cm, accuracy
    #cm = confusion_matrix(y_test, y_pred).round()
    #print("Predicted values:\n", y_pred) 
    #print("Confusion Matrix: \n", cm) 
    #scores = cross_val_score(xgb_class, y_test, y_pred, scoring='balanced_accuracy', cv=cv, n_jobs=-1)
    #print("Report : \n", scores)

In [7]:
# Driver code 
def main(): 
    # Building Phase 
    X_train, X_test, y_train, y_test = splitdataset(export_df) 
    basemodel = train_xgb(X_train, X_test, y_train)
    # Operational Phase 
    print("-----"*15)
    print("Results:\n")
    # Prediction
    y_pred = prediction(X_test, basemodel) 
    cm,accuracy=class_metrics(y_test, y_pred)
    return basemodel,y_pred,cm,accuracy

In [8]:
basemodel,y_pred,cm,accuracy=main()

int64
Fitting 15 folds for each of 3 candidates, totalling 45 fits
[CV 1/15; 1/3] START scale_pos_weight=1.........................................
[CV 2/15; 1/3] START scale_pos_weight=1.........................................
[CV 5/15; 1/3] START scale_pos_weight=1.........................................
[CV 4/15; 1/3] START scale_pos_weight=1.........................................
[CV 3/15; 1/3] START scale_pos_weight=1.........................................
[CV 8/15; 1/3] START scale_pos_weight=1.........................................
[CV 6/15; 1/3] START scale_pos_weight=1.........................................
[CV 7/15; 1/3] START scale_pos_weight=1.........................................
[CV 1/15; 1/3] END ............scale_pos_weight=1;, score=nan total time=   0.9s
[CV 4/15; 1/3] END ............scale_pos_weight=1;, score=nan total time=   1.0s
[CV 6/15; 1/3] END ............scale_pos_weight=1;, score=nan total time=   1.0s
[CV 8/15; 1/3] END ............scale_pos_w

Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 5/15; 1/3] END ............scale_pos_weight=1;, score=nan total time=   1.1s
[CV 9/15; 1/3] START scale_pos_weight=1.........................................
[CV 10/15; 1/3] START scale_pos_weight=1........................................
[CV 11/15; 1/3] START scale_pos_weight=1........................................
[CV 12/15; 1/3] START scale_pos_weight=1........................................
[CV 9/15; 1/3] END ............scale_pos_weight=1;, score=nan total time=   0.3s
[CV 13/15; 1/3] START scale_pos_weight=1........................................
[CV 10/15; 1/3] END ...........scale_pos_weight=1;, score=nan total time=   0.3s
[CV 11/15; 1/3] END ...........scale_pos_weight=1;, score=nan total time=   0.3s
[CV 14/15; 1/3] START scale_pos_weight=1........................................


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 15/15; 1/3] START scale_pos_weight=1........................................
[CV 12/15; 1/3] END ...........scale_pos_weight=1;, score=nan total time=   0.3s
[CV 13/15; 1/3] END ...........scale_pos_weight=1;, score=nan total time=   0.3s
[CV 1/15; 2/3] START scale_pos_weight=2.........................................
[CV 2/15; 2/3] START scale_pos_weight=2.........................................
[CV 14/15; 1/3] END ...........scale_pos_weight=1;, score=nan total time=   0.3s


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 3/15; 2/3] START scale_pos_weight=2.........................................
[CV 4/15; 2/3] START scale_pos_weight=2.........................................
[CV 15/15; 1/3] END ...........scale_pos_weight=1;, score=nan total time=   0.3s
[CV 5/15; 2/3] START scale_pos_weight=2.........................................


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 1/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.4s
[CV 6/15; 2/3] START scale_pos_weight=2.........................................
[CV 7/15; 2/3] START scale_pos_weight=2.........................................
[CV 8/15; 2/3] START scale_pos_weight=2.........................................
[CV 2/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.4s
[CV 9/15; 2/3] START scale_pos_weight=2.........................................


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 3/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.5s
[CV 10/15; 2/3] START scale_pos_weight=2........................................
[CV 11/15; 2/3] START scale_pos_weight=2........................................
[CV 4/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.6s


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 5/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.6s
[CV 6/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.6s
[CV 8/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.5s
[CV 7/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.5s
[CV 9/15; 2/3] END ............scale_pos_weight=2;, score=nan total time=   0.6s
[CV 12/15; 2/3] START scale_pos_weight=2........................................


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 10/15; 2/3] END ...........scale_pos_weight=2;, score=nan total time=   0.5s
[CV 11/15; 2/3] END ...........scale_pos_weight=2;, score=nan total time=   0.4s
[CV 13/15; 2/3] START scale_pos_weight=2........................................
[CV 14/15; 2/3] START scale_pos_weight=2........................................
[CV 15/15; 2/3] START scale_pos_weight=2........................................
[CV 1/15; 3/3] START scale_pos_weight=4.........................................
[CV 2/15; 3/3] START scale_pos_weight=4.........................................
[CV 12/15; 2/3] END ...........scale_pos_weight=2;, score=nan total time=   0.3s
[CV 3/15; 3/3] START scale_pos_weight=4.........................................


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 4/15; 3/3] START scale_pos_weight=4.........................................
[CV 5/15; 3/3] START scale_pos_weight=4.........................................
[CV 13/15; 2/3] END ...........scale_pos_weight=2;, score=nan total time=   0.4s


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 6/15; 3/3] START scale_pos_weight=4.........................................
[CV 14/15; 2/3] END ...........scale_pos_weight=2;, score=nan total time=   0.4s
[CV 15/15; 2/3] END ...........scale_pos_weight=2;, score=nan total time=   0.5s
[CV 2/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.5s
[CV 1/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.5s


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 3/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.5s
[CV 4/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.5s
[CV 5/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.5s
[CV 7/15; 3/3] START scale_pos_weight=4.........................................
[CV 8/15; 3/3] START scale_pos_weight=4.........................................


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 6/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.5s
[CV 9/15; 3/3] START scale_pos_weight=4.........................................
[CV 10/15; 3/3] START scale_pos_weight=4........................................
[CV 11/15; 3/3] START scale_pos_weight=4........................................
[CV 12/15; 3/3] START scale_pos_weight=4........................................
[CV 14/15; 3/3] START scale_pos_weight=4........................................
[CV 13/15; 3/3] START scale_pos_weight=4........................................
[CV 7/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.5s


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 8/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.5s
[CV 15/15; 3/3] START scale_pos_weight=4........................................
[CV 9/15; 3/3] END ............scale_pos_weight=4;, score=nan total time=   0.6s


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 10/15; 3/3] END ...........scale_pos_weight=4;, score=nan total time=   0.6s
[CV 12/15; 3/3] END ...........scale_pos_weight=4;, score=nan total time=   0.6s
[CV 13/15; 3/3] END ...........scale_pos_weight=4;, score=nan total time=   0.6s
[CV 14/15; 3/3] END ...........scale_pos_weight=4;, score=nan total time=   0.6s
[CV 11/15; 3/3] END ...........scale_pos_weight=4;, score=nan total time=   0.7s


Traceback (most recent call last):
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/core.py", line 436, in inner_f
    return f(**kwargs)
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1158, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 236, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/site-packages/xgboost/sklearn.py", line 1172, in <lambda>
    create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
  File "/Users/fortes/neuefische/Capstone-WIMT/.venv/lib/python3.8/sit

[CV 15/15; 3/3] END ...........scale_pos_weight=4;, score=nan total time=   0.5s


ValueError: DataFrame.dtypes for data must be int, float, bool or categorical.  When
                categorical type is supplied, DMatrix parameter
                `enable_categorical` must be set to `True`.lenght

In [None]:
f = open('ml-log_xgboost.txt', 'a')
f.write('Base model: XGBoost\n Predicted values:\n {}\n Confusion Matrix:\n {}\n Balanced Accuracy:\n {} \n Model: {}'.format(y_pred,cm,accuracy,basemodel))
f.close()