# Training on 80% data and then subsequent cross-validation

In [3]:
import warnings
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.linear_model import LinearRegression


def warn(*args, **kwargs):
    pass


warnings.warn = warn


class Classifier():

    def func(self, clf, X, y):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, shuffle=True, test_size=0.20, random_state=42)
        clf.fit(X_train, y_train)
        y_predicted = clf.predict(X_test)
        print("Confusion_matrix for initial training-test set")
        print(confusion_matrix(y_test, y_predicted))
        print("Classification_report for initial training-test set")
        print(classification_report(y_test, y_predicted))
        print("Training score for initial training-test set")
        print(clf.score(X_train, y_train))
        print("Testing score for initial training-test set")
        print(clf.score(X_test, y_test))
        accuracies = cross_val_score(estimator=clf, X=X, y=y, cv=10)
        print("Cross-validation Accuracies:")
        print(accuracies)
        print("Cross-validation Accuracies Mean:")
        print(accuracies.mean())
        print("Cross-validation Accuracies Standard Deviation Mean:")
        print(accuracies.std())
        y_pred_cross_val = cross_val_predict(clf, X, y, cv=10)
        print("Confusion matrix of combined cross-validation data predicted results")
        print(confusion_matrix(y, y_pred_cross_val))
        print("Classification_report of combined cross-validation data predicted results")
        print(classification_report(y, y_pred_cross_val))
        cv_results = cross_validate(clf, X, y, cv=10, return_train_score=True)
        print("Cross-validation testing scores:")
        print(cv_results['test_score'])
        print("Cross-validation training scores")
        print(cv_results['train_score'])


if __name__ == '__main__':
    obj = Classifier()
    data = pd.read_csv('tictac_final.txt', sep=" ", header=None)
    col_X = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    col_y = [9]
    X_final = data[col_X]
    y = data[col_y]

    print("For final boards classification dataset:")
    print("Linear SVM Classifier:")
    clf_svm = svm.SVC(kernel='linear', degree=2, gamma='auto',
                      C=1.2, coef0=0.2, probability=True, random_state=42)

    obj.func(clf_svm, X, y)

    print("MLP Classifier:")
    clf_mlp = MLPClassifier(random_state=42, max_iter=1000,
                            solver='lbfgs', activation='tanh', early_stopping=True)
    obj.func(clf_mlp, X, y)

    print("KNeighborsClassifier:")
    clf_knn = KNeighborsClassifier(n_neighbors=3,weights='distance',algorithm='kd_tree',p=2,leaf_size=3,n_jobs=-1)
    obj.func(clf_knn, X, y)

    data_single = pd.read_csv('tictac_single.txt', sep=" ", header=None)

    col_X = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    col_y = [9]
    X_single = data_single[col_X]
    y_single = data_single[col_y]
    
    print("For intermediate boards optimal play(single label) dataset:")
    print("KNeighborsClassifier:")
    clf_knn = KNeighborsClassifier(n_neighbors=9,weights='distance',algorithm='kd_tree',p=2,leaf_size=9,n_jobs=-1)
    obj.func(clf_knn, X_single, y_single)

    print("Linear SVM Classifier:")
    clf_svm = svm.SVC(kernel='linear', degree=9, gamma='auto', C=9, coef0=0.11,
                      probability=True, random_state=42, class_weight='balanced')
    obj.func(clf_svm, X_single, y_single)

    print("MLP Classifier:")
    clf_mlp = MLPClassifier(random_state=42, max_iter=1000,
                            solver='lbfgs', activation='tanh', early_stopping=True)
    obj.func(clf_mlp, X_single, y_single)


For final boards classification dataset:
Linear SVM Classifier:
Confusion_matrix for initial training-test set
[[ 61   6]
 [  0 125]]
Classification_report for initial training-test set
              precision    recall  f1-score   support

          -1       1.00      0.91      0.95        67
           1       0.95      1.00      0.98       125

    accuracy                           0.97       192
   macro avg       0.98      0.96      0.96       192
weighted avg       0.97      0.97      0.97       192

Training score for initial training-test set
0.9869451697127938
Testing score for initial training-test set
0.96875
Cross-validation Accuracies:
[1.         1.         1.         1.         1.         1.
 1.         1.         1.         0.83157895]
Cross-validation Accuracies Mean:
0.983157894736842
Cross-validation Accuracies Standard Deviation Mean:
0.050526315789473676
Confusion matrix of combined cross-validation data predicted results
[[316  16]
 [  0 626]]
Classification_repo

0.1648854961832061
Testing score for initial training-test set
0.16552250190694126
Cross-validation Accuracies:
[0.13719512 0.14961832 0.1648855  0.17557252 0.15725191 0.18320611
 0.20458015 0.15267176 0.18167939 0.17251908]
Cross-validation Accuracies Mean:
0.1679179854775647
Cross-validation Accuracies Standard Deviation Mean:
0.01865786744900777
Confusion matrix of combined cross-validation data predicted results
[[176 728  52  85   4 302  16 182  19]
 [ 25 556   3   3   0 206  10  21  21]
 [ 92 408 100   8   4 275   1  64  21]
 [ 29 189  30  46   0 148   5  62   4]
 [ 93 434  39  69  15 294  13  85   4]
 [ 34  96  11   5   0 161  14  18  14]
 [ 42 178  63  18   9 144  14  69   7]
 [ 54  95  25   1   0  43   4  25   7]
 [ 45 186  19  14   0 119  19  50   7]]
Classification_report of combined cross-validation data predicted results
              precision    recall  f1-score   support

           0       0.30      0.11      0.16      1564
           1       0.19      0.66      0.30  

# Training on only 10% data and then subseuent cross-validation 

In [6]:
import warnings
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.linear_model import LinearRegression


def warn(*args, **kwargs):
    pass


warnings.warn = warn


class Classifier():

    def func(self, clf, X, y):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, shuffle=True, test_size=0.90, random_state=42)
        clf.fit(X_train, y_train)
        y_predicted = clf.predict(X_test)
        print("Confusion_matrix for initial training-test set")
        print(confusion_matrix(y_test, y_predicted))
        print("Classification_report for initial training-test set")
        print(classification_report(y_test, y_predicted))
        print("Training score for initial training-test set")
        print(clf.score(X_train, y_train))
        print("Testing score for initial training-test set")
        print(clf.score(X_test, y_test))
        accuracies = cross_val_score(estimator=clf, X=X, y=y, cv=10)
        print("Cross-validation Accuracies:")
        print(accuracies)
        print("Cross-validation Accuracies Mean:")
        print(accuracies.mean())
        print("Cross-validation Accuracies Standard Deviation Mean:")
        print(accuracies.std())
        y_pred_cross_val = cross_val_predict(clf, X, y, cv=10)
        print("Confusion matrix of combined cross-validation data predicted results")
        print(confusion_matrix(y, y_pred_cross_val))
        print("Classification_report of combined cross-validation data predicted results")
        print(classification_report(y, y_pred_cross_val))
        cv_results = cross_validate(clf, X, y, cv=10, return_train_score=True)
        print("Cross-validation testing scores:")
        print(cv_results['test_score'])
        print("Cross-validation training scores")
        print(cv_results['train_score'])


if __name__ == '__main__':
    obj = Classifier()
    data = pd.read_csv('tictac_final.txt', sep=" ", header=None)
    col_X = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    col_y = [9]
    X_final = data[col_X]
    y = data[col_y]

    print("For final boards classification dataset:")
    print("Linear SVM Classifier:")
    clf_svm = svm.SVC(kernel='linear', degree=2, gamma='auto',
                      C=1.2, coef0=0.2, probability=True, random_state=42)

    obj.func(clf_svm, X, y)

    print("MLP Classifier:")
    clf_mlp = MLPClassifier(random_state=42, max_iter=1000,
                            solver='lbfgs', activation='tanh', early_stopping=True)
    obj.func(clf_mlp, X, y)

    print("KNeighborsClassifier:")
    clf_knn = KNeighborsClassifier(n_neighbors=3,weights='distance',algorithm='kd_tree',p=2,leaf_size=3,n_jobs=-1)
    obj.func(clf_knn, X, y)

    data_single = pd.read_csv('tictac_single.txt', sep=" ", header=None)

    col_X = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    col_y = [9]
    X_single = data_single[col_X]
    y_single = data_single[col_y]
    
    print("For intermediate boards optimal play(single label) dataset:")
    print("KNeighborsClassifier:")
    clf_knn = KNeighborsClassifier(n_neighbors=9,weights='distance',algorithm='kd_tree',p=2,leaf_size=9,n_jobs=-1)
    obj.func(clf_knn, X_single, y_single)

    print("Linear SVM Classifier:")
    clf_svm = svm.SVC(kernel='linear', degree=9, gamma='auto', C=9, coef0=0.11,
                      probability=True, random_state=42, class_weight='balanced')
    obj.func(clf_svm, X_single, y_single)

    print("MLP Classifier:")
    clf_mlp = MLPClassifier(random_state=42, max_iter=50,
                            solver='lbfgs', activation='tanh', early_stopping=True)
    obj.func(clf_mlp, X_single, y_single)


For final boards classification dataset:
Linear SVM Classifier:
Confusion_matrix for initial training-test set
[[289  15]
 [  0 559]]
Classification_report for initial training-test set
              precision    recall  f1-score   support

          -1       1.00      0.95      0.97       304
           1       0.97      1.00      0.99       559

    accuracy                           0.98       863
   macro avg       0.99      0.98      0.98       863
weighted avg       0.98      0.98      0.98       863

Training score for initial training-test set
0.9894736842105263
Testing score for initial training-test set
0.9826187717265353
Cross-validation Accuracies:
[1.         1.         1.         1.         1.         1.
 1.         1.         1.         0.83157895]
Cross-validation Accuracies Mean:
0.983157894736842
Cross-validation Accuracies Standard Deviation Mean:
0.050526315789473676
Confusion matrix of combined cross-validation data predicted results
[[316  16]
 [  0 626]]
Classifi

Cross-validation Accuracies:
[0.13719512 0.14961832 0.1648855  0.17557252 0.15725191 0.18320611
 0.20458015 0.15267176 0.18167939 0.17251908]
Cross-validation Accuracies Mean:
0.1679179854775647
Cross-validation Accuracies Standard Deviation Mean:
0.01865786744900777
Confusion matrix of combined cross-validation data predicted results
[[176 728  52  85   4 302  16 182  19]
 [ 25 556   3   3   0 206  10  21  21]
 [ 92 408 100   8   4 275   1  64  21]
 [ 29 189  30  46   0 148   5  62   4]
 [ 93 434  39  69  15 294  13  85   4]
 [ 34  96  11   5   0 161  14  18  14]
 [ 42 178  63  18   9 144  14  69   7]
 [ 54  95  25   1   0  43   4  25   7]
 [ 45 186  19  14   0 119  19  50   7]]
Classification_report of combined cross-validation data predicted results
              precision    recall  f1-score   support

           0       0.30      0.11      0.16      1564
           1       0.19      0.66      0.30       845
           2       0.29      0.10      0.15       973
           3       0