In [1]:
import pandas as pd

df = pd.read_csv("data.csv")
df = df.query("num == 3")

df_b = df.query("turn == 0")
df_b = df_b.query("last_black_score > last_white_score")

df_w = df.query("turn == 1")
df_w = df_b.query("last_white_score > last_black_score")

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import numpy as np

x = df_b.drop(["turn", "num", "put_place", "last_black_score", "last_white_score"], axis=1)
y = df_b["put_place"]

x_train, x_test, y_train, y_test = train_test_split(
    x,
    y,
    test_size=0.3,
    random_state=0
)

In [3]:
import matplotlib.pyplot as plt

def plot(x, y_train, y_test, xlabel, ylabel, title, save_dir):
    fig = plt.figure(figsize=(10, 10))
    plt.plot(x, y_train, label="train score")
    plt.plot(x, y_test, label="test score")
    plt.legend()
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.plot()
    plt.savefig(save_dir % title)
    plt.clf()
    plt.close()

In [None]:
# with open("learn_data_black.csv", "w") as file:
#     file.write("criterion,splitter,max_features,max_depth,min_samples_split,")
#     file.write("min_samples_leaf,max_leaf_nodes,train_score,test_score\n")
#     for criterion in ["entropy", "gini"]:
#         for splitter in ["best", "random"]:
#             for max_features in [i for i in range(0, 9 + 1)]:
#                 for max_depth in [i for i in range(1, 41)]:
#                     for min_samples_split in np.arange(0.001, 0.01, 0.001):
#                         for min_samples_leaf in [i for i in range(1, 101)]:
#                             for max_leaf_nodes in [i for i in range(2, 501)]:
#                                 if max_features == 0:
#                                     max_features = None
#                                 model = DecisionTreeClassifier(\
#                                     criterion=criterion,
#                                     splitter=splitter,
#                                     max_features=max_features,
#                                     max_depth=max_depth,
#                                     min_samples_split=min_samples_split,
#                                     min_samples_leaf=min_samples_leaf,
#                                     max_leaf_nodes=max_leaf_nodes,
#                                     random_state=0
#                                 )
#                                 model.fit(x_train, y_train)
#                                 train_score = model.score(x_train, y_train)
#                                 test_score = model.score(x_test, y_test)
#                                 file.write("%s,%s," % (criterion, splitter))
#                                 if max_features:
#                                     file.write("%d," % max_features)
#                                 else:
#                                     file.write("None,")
#                                 file.write(\
#                                     "%d,%f,%d,%d,%f,%f\n" % (\
#                                         max_depth,
#                                         min_samples_split,
#                                         min_samples_leaf,
#                                         max_leaf_nodes,
#                                         train_score,
#                                         test_score
#                                     )
#                                 )

In [25]:
for criterion in ["entropy", "gini"]:
    model = DecisionTreeClassifier(criterion=criterion, random_state=0)
    model.fit(x_train, y_train)
    print("%s:" % criterion)
    print("train score:\t%f" % model.score(x_train, y_train))
    print("test score:\t%f" % model.score(x_test, y_test))

entropy:
train score:	0.831784
test score:	0.341225
gini:
train score:	0.831784
test score:	0.344908


In [26]:
for splitter in ["best", "random"]:
    model = DecisionTreeClassifier(splitter=splitter, random_state=0)
    model.fit(x_train, y_train)
    print("%s:" % splitter)
    print("train score:\t%f" % model.score(x_train, y_train))
    print("test score:\t%f" % model.score(x_test, y_test))

best:
train score:	0.831784
test score:	0.344908
random:
train score:	0.831784
test score:	0.340056


In [10]:
x_data = [i for i in range(10)]
y_train_data = []
y_test_data = []

for max_features in x_data:
    if max_features:
        pass
    else:
        max_features = None
    model = DecisionTreeClassifier(max_features=max_features, random_state=0)
    model.fit(x_train, y_train)
    y_train_data.append(model.score(x_train, y_train))
    y_test_data.append(model.score(x_test, y_test))

plot(
    x_data,
    y_train_data,
    y_test_data,
    "max_features",
    "accuracy",
    "accuracy for each max_features",
    "fig/%s"
)

In [4]:
x_data = [i for i in range(1, 101)]
y_train_data = []
y_test_data = []

for max_depth in x_data:
    model = DecisionTreeClassifier(max_depth=max_depth)
    model.fit(x_train, y_train)
    y_train_data.append(model.score(x_train, y_train))
    y_test_data.append(model.score(x_test, y_test))

plot(
    x_data,
    y_train_data,
    y_test_data,
    "max_depth",
    "accuracy",
    "accuracy for each max_depth",
    "fig/%s"
)

In [5]:
x_data = [i for i in np.arange(0.001, 0.01, 0.001)]
y_train_data = []
y_test_data = []

for min_samples_split in x_data:
    model = DecisionTreeClassifier(min_samples_split=min_samples_split)
    model.fit(x_train, y_train)
    y_train_data.append(model.score(x_train, y_train))
    y_test_data.append(model.score(x_test, y_test))

plot(
    x_data,
    y_train_data,
    y_test_data,
    "min_samples_split",
    "accuracy",
    "accuracy for each min_samples_split",
    "fig/%s"
)

In [6]:
x_data = [i for i in range(1, 101)]
y_train_data = []
y_test_data = []

for min_samples_leaf in x_data:
    model = DecisionTreeClassifier(min_samples_leaf=min_samples_leaf)
    model.fit(x_train, y_train)
    y_train_data.append(model.score(x_train, y_train))
    y_test_data.append(model.score(x_test, y_test))

plot(
    x_data,
    y_train_data,
    y_test_data,
    "min_samples_leaf",
    "accuracy",
    "accuracy for each min_samples_leaf",
    "fig/%s"
)

In [7]:
x_data = [i for i in range(2, 501)]
y_train_data = []
y_test_data = []

for max_leaf_nodes in x_data:
    model = DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes)
    model.fit(x_train, y_train)
    y_train_data.append(model.score(x_train, y_train))
    y_test_data.append(model.score(x_test, y_test))

plot(
    x_data,
    y_train_data,
    y_test_data,
    "max_leaf_nodes",
    "accuracy",
    "accuracy for each max_leaf_nodes",
    "fig/%s"
)