In [2]:
from sklearn.model_selection import train_test_split, GridSearchCV
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score, classification_report, confusion_matrix
import numpy as np
import json
import pandas as pd

In [3]:
with open('x_vector.json', 'r') as file:
    x_vector = json.load(file)

with open('y_vector.json', 'r') as file:
    y_vector = json.load(file)


filtered_indices = [
    i for i, sublist in enumerate(x_vector) 
    if not any(
        (isinstance(value, (float, int)) and (np.isinf(value) or np.abs(value) > np.finfo(np.float32).max or np.isnan(value) or value == 'nan'))
        for value in sublist
    )
]

x_vector_filtered = [x_vector[i] for i in filtered_indices]
y_vector_filtered = [y_vector[i] for i in filtered_indices]


In [4]:
x_train, x_test, y_train, y_test = train_test_split(x_vector_filtered, y_vector_filtered, test_size=0.20, random_state=42)

In [5]:
x_train = np.array(x_train).astype(float)
y_train = np.array(y_train).astype(float)
x_test = np.array(x_test).astype(float)
y_test = np.array(y_test).astype(float)

In [6]:
xgb_clf = xgb.XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

xgb_clf.fit(x_train, y_train)

y_pred = xgb_clf.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Test Accuracy: 0.7439024390243902

Classification Report:
              precision    recall  f1-score   support

         0.0       0.80      0.82      0.81       164
         1.0       0.62      0.59      0.60        82

    accuracy                           0.74       246
   macro avg       0.71      0.70      0.71       246
weighted avg       0.74      0.74      0.74       246

Confusion Matrix:
[[135  29]
 [ 34  48]]
