In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [9]:
PATH = '../data/data.csv'
PATH_OVERSAMPLED = '../data/data_oversampled.csv'
MODEL_PATH = '../decision_tree.pkl'

X = 1
O = -1
BLANK = 0

O_WIN   = 0
DRAW    = 1
ONGOING = 2
X_WIN   = 3

In [10]:
df = pd.read_csv(PATH)
df.sample(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,category
301,-1,1,1,-1,1,-1,0,1,0,3
950,-1,1,1,1,1,-1,-1,-1,1,1
1662,-1,0,-1,-1,1,-1,1,1,1,3
1988,0,1,0,1,0,0,0,0,-1,2
1899,-1,-1,1,1,1,-1,-1,-1,1,1
977,-1,-1,-1,1,1,-1,-1,1,1,0
581,0,-1,0,1,-1,-1,1,1,1,3
678,1,-1,1,-1,-1,1,1,-1,0,0
1321,1,1,-1,-1,-1,-1,-1,1,1,0
190,1,-1,0,1,1,-1,1,0,-1,3


In [11]:
feature_cols = [str(i) for i in range(9)]
X = df[feature_cols]


y = df['category']

In [12]:
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.4, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

In [13]:
dt = DecisionTreeClassifier(
    criterion='gini',
    max_depth=10,
    min_samples_split=2,
    random_state=42
)

In [15]:
dt.fit(X_train.to_numpy(), y_train.to_numpy())

In [16]:
y_pred_val = dt.predict(X_val)
print("Validation Accuracy :", accuracy_score(y_val, y_pred_val))
print("Validation Precision:", precision_score(y_val, y_pred_val, average='weighted'))
print("Validation Recall   :", recall_score(y_val, y_pred_val, average='weighted'))
print("Validation F1-score :", f1_score(y_val, y_pred_val, average='weighted'))

Validation Accuracy : 0.9338235294117647
Validation Precision: 0.9283132473160108
Validation Recall   : 0.9338235294117647
Validation F1-score : 0.9293263870582925




In [17]:
print("\nClassification Report (Validation):\n",
      classification_report(y_val, y_pred_val, digits=4))


Classification Report (Validation):
               precision    recall  f1-score   support

           0     0.9526    0.9628    0.9577       188
           1     0.5000    0.1429    0.2222         7
           2     0.7200    0.7200    0.7200        25
           3     0.9476    0.9628    0.9551       188

    accuracy                         0.9338       408
   macro avg     0.7801    0.6971    0.7138       408
weighted avg     0.9283    0.9338    0.9293       408



In [18]:
print("\nÁrvore de Decisão (texto):\n", export_text(dt, feature_names=feature_cols))


Árvore de Decisão (texto):
 |--- 4 <= -0.50
|   |--- 8 <= 0.50
|   |   |--- 0 <= 0.50
|   |   |   |--- 0 <= -0.50
|   |   |   |   |--- class: 0
|   |   |   |--- 0 >  -0.50
|   |   |   |   |--- 6 <= 0.50
|   |   |   |   |   |--- 6 <= -0.50
|   |   |   |   |   |   |--- 3 <= -0.50
|   |   |   |   |   |   |   |--- 1 <= 0.00
|   |   |   |   |   |   |   |   |--- class: 2
|   |   |   |   |   |   |   |--- 1 >  0.00
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- 3 >  -0.50
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- 6 >  -0.50
|   |   |   |   |   |   |--- 2 <= 0.50
|   |   |   |   |   |   |   |--- 8 <= -0.50
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- 8 >  -0.50
|   |   |   |   |   |   |   |   |--- 3 <= -0.50
|   |   |   |   |   |   |   |   |   |--- 1 <= 0.00
|   |   |   |   |   |   |   |   |   |   |--- class: 2
|   |   |   |   |   |   |   |   |   |--- 1 >  0.00
|   |   |   |   |   |   |   |   |   |   |--- cl

In [19]:
y_pred_test = dt.predict(X_test)
print("Test  Accuracy :", accuracy_score(y_test, y_pred_test))
print("Test  Precision:", precision_score(y_test, y_pred_test, average='weighted'))
print("Test  Recall   :", recall_score(y_test, y_pred_test, average='weighted'))
print("Test  F1-score :", f1_score(y_test, y_pred_test, average='weighted'))

Test  Accuracy : 0.9144254278728606
Test  Precision: 0.9104622468358601
Test  Recall   : 0.9144254278728606
Test  F1-score : 0.9112934920567067




In [20]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

joblib.dump(dt, MODEL_PATH)
print(f'Modelo salvo em {MODEL_PATH}')


Modelo salvo em ../decision_tree.pkl
