In [1]:
import joblib 
import sys 
import os
import pandas as pd
import numpy as np

notebook_dir = os.getcwd() 
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(project_root)

from sklearn.metrics import accuracy_score, classification_report

from config.config import PREDICTOR_MODEL_PATH, TEST_DATA_PATH

In [2]:
test_data = joblib.load(TEST_DATA_PATH)
bushy_model = joblib.load(PREDICTOR_MODEL_PATH)

In [3]:
X_test = test_data['X_test']
y_test = test_data['y_test']

print(X_test)
print(y_test)

      experiance_diff  win_proportion_diff  loss_proportion_diff  \
5705              117             0.584711             -0.592975   
1779                6            -0.009868              0.041118   
7561                1            -0.057143              0.057143   
330                 4            -0.057143              0.057143   
6829              -11            -0.021830              0.021830   
...               ...                  ...                   ...   
558               -21            -0.019886              0.019886   
6699                0            -0.100000              0.100000   
5085               -9            -0.050000              0.050000   
3541               11            -0.124704              0.124704   
3133               -5            -0.051136              0.051136   

      height_diff_cm  reach_diff_cm  age_diff  prime_score_diff    elo_diff  \
5705            2.54           0.00  0.000000         -0.235429  346.001709   
1779          -10.16     

In [4]:
bad_entries = {'hello': 1}
for col in X_test.columns:
    mask = ~pd.to_numeric(X_test[col], errors="coerce").notna()
    if mask.any():
        bad_entries[col] = X_test.loc[mask, col].unique()

print("Bad entries found:", bad_entries)

Bad entries found: {'hello': 1}


In [5]:

trees_with_errors = []

for i in range(100):
    print(i)
    try:
        predictions = bushy_model.trees[i].predict(X_test.values[:])
    except Exception as e:
        print('error at tree', i)
        trees_with_errors.append(i)
        continue
    print(predictions)
    
print(trees_with_errors)


0
[1 1 1 ... 0 0 0]
1
[1 1 0 ... 0 1 0]
2
[1 0 0 ... 0 0 0]
3
[1 0 0 ... 0 0 0]
4
[1 1 1 ... 0 0 0]
5
[1 0 0 ... 0 1 0]
6
[0 1 1 ... 0 1 1]
7
[1 0 0 ... 0 1 0]
8
[1 0 0 ... 0 0 1]
9
[1 0 1 ... 0 1 1]
10
[1 0 0 ... 0 1 0]
11
[1 0 0 ... 0 1 0]
12
[1 1 0 ... 0 1 0]
13
[1 1 0 ... 0 1 1]
14
[1 0 1 ... 0 0 1]
15
[1 0 0 ... 1 0 1]
16
[1 0 0 ... 0 0 0]
17
[1 1 0 ... 0 0 0]
18
[1 1 1 ... 0 0 1]
19
[1 0 0 ... 0 1 0]
20
[1 1 1 ... 0 0 0]
21
[1 0 0 ... 0 1 0]
22
[1 1 0 ... 0 1 0]
23
[1 0 0 ... 0 0 0]
24
[1 0 0 ... 0 0 0]
25
[1 0 1 ... 0 1 0]
26
[1 0 0 ... 0 0 0]
27
[1 0 0 ... 0 1 0]
28
[1 1 0 ... 0 1 0]
29
[1 1 0 ... 0 0 1]
30
[1 1 1 ... 0 0 0]
31
[1 0 0 ... 0 1 0]
32
[0 0 1 ... 0 0 0]
33
[1 0 0 ... 0 1 0]
34
[1 1 0 ... 0 1 0]
35
[1 1 1 ... 0 0 0]
36
[1 1 0 ... 0 0 0]
37
[1 0 0 ... 0 1 0]
38
[1 1 1 ... 0 1 0]
39
[1 0 0 ... 0 1 1]
40
[1 0 0 ... 0 1 0]
41
[1 1 0 ... 0 1 0]
42
[1 0 0 ... 0 1 0]
43
[1 0 0 ... 0 0 0]
44
[1 0 0 ... 0 1 0]
45
[1 0 0 ... 0 1 0]
46
[1 0 0 ... 0 0 0]
47
[1 0 0 ... 0 1 0]
48

In [6]:
predictions = bushy_model.predict(X_test.values)

In [7]:
count =bushy_model.trees[2].debug_tree()
print(count)

Feature 20 <= 0.0 (type=<class 'numpy.float64'>)
  Feature 12 <= -0.11944444444444441 (type=<class 'numpy.float64'>)
    Feature 25 <= 0.0 (type=<class 'numpy.float64'>)
      Feature 0 <= -1.0 (type=<class 'numpy.float64'>)
        Feature 0 <= -6.0 (type=<class 'numpy.float64'>)
          Feature 9 <= 11.806455035633041 (type=<class 'numpy.float64'>)
            Feature 13 <= -0.14478764478764639 (type=<class 'numpy.float64'>)
              Feature 7 <= -9.02889057306038 (type=<class 'numpy.float64'>)
                Feature 8 <= 36.82973770019112 (type=<class 'numpy.float64'>)
                  Feature 12 <= -0.13095238095238096 (type=<class 'numpy.float64'>)
                    Feature 41 <= 0.0 (type=<class 'numpy.float64'>)
                      Leaf: value=1
                      Feature 21 <= 0.0 (type=<class 'numpy.float64'>)
                        Leaf: value=0
                        Leaf: value=1
                    Leaf: value=0
                  Feature 24 <= 0.0 (type=<

In [8]:
accuracy = accuracy_score(y_test, predictions)
print(f"accuracy: {accuracy}")

accuracy: 0.8031784841075794


In [9]:
report = classification_report(y_test, predictions, target_names=['Blue Wins', 'Red Wins'])
print(report)

              precision    recall  f1-score   support

   Blue Wins       0.80      0.81      0.80       807
    Red Wins       0.81      0.80      0.80       829

    accuracy                           0.80      1636
   macro avg       0.80      0.80      0.80      1636
weighted avg       0.80      0.80      0.80      1636

