In [1]:
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report, mean_absolute_error, \
    mean_squared_error, r2_score
from sklearn.multioutput import MultiOutputRegressor
from math import floor, ceil

# Classifiers for the Single Label Optimal Play

In [49]:
# Setup to train learning models
A = np.loadtxt('tictac_single.txt')
X_single = A[:, :9]  # Input features
y_single = A[:, 9:].ravel()  # Output labels

# X is a 3x3 grid for tictactoe. It should go like this:
#   x0  |   x1  |   x2
#   x3  |   x4  |   x5
#   x6  |   x7  |   x8

# Y gives the output of the game (the winner)

X_train, X_test, y_train, y_test = train_test_split(X_single, y_single, 
                                                                    test_size=0.2, random_state=42, 
                                                                    stratify=y_single)

# LETS DOWNSIZE

downsampled_X_train, downsampled_y_train = resample(X_train, y_train, 
                                                                     replace=False, n_samples=2000, 
                                                                     random_state=42)

SVM Model

In [52]:
# **************SVM Model******************* single.txt
# scaler = StandardScaler()
# X_std = scaler.fit_transform(X_train)
# X_test_std = scaler.transform(X_test)

# svm_clf_single = LinearSVC(C=100.0, dual=True, max_iter=100000) # Accuracy of 25%
svm_clf_single = SVC(kernel='linear', C=100.0) # Accuracy of 46.52%
svm_clf_single.fit(downsampled_X_train, downsampled_y_train)

svm_y_pred = svm_clf_single.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_y_pred)
svm_cm = confusion_matrix(y_test, svm_y_pred)


print("*****************Linear SVM - Single*****************")
print(f'Accuracy: {svm_accuracy}')
print('Confusion Matrix:')
print(svm_cm)

svm_scores = cross_val_score(svm_clf_single, X_single, y_single, cv=10)
print(f'Cross-Validation Accuracy: {svm_scores.mean()}\n')

*****************Linear SVM - Single*****************
Accuracy: 0.4652936689549962
Confusion Matrix:
[[313   0   0   0   0   0   0   0   0]
 [ 25  40  47   0  55   0   2   0   0]
 [ 47   9  84   0  55   0   0   0   0]
 [ 28  19  21  11  24   0   0   0   0]
 [ 59   0   0   0 150   0   0   0   0]
 [ 30  10  16   0  12   0   2   0   0]
 [ 38  24  17   0  18   0  12   0   0]
 [ 18  14  13   0   6   0   0   0   0]
 [ 32  15  27   0  15   0   3   0   0]]
Cross-Validation Accuracy: 0.3651380096816235



Multilayer Perceptron Model

In [4]:
# ********************MLP Model**************************** single.txt

mlp_clf_single = MLPClassifier(hidden_layer_sizes=(1024, 512, 128, 64, 32), max_iter=1900, random_state=42)

# Train the model
mlp_clf_single.fit(X_train, y_train)

mlp_y_pred = mlp_clf_single.predict(X_test)
mlp_accuracy = accuracy_score(y_test, mlp_y_pred)
mlp_cm = confusion_matrix(y_test, mlp_y_pred)

print("*****************MLP Model - Single*****************")
print(f'Accuracy: {mlp_accuracy}')
print('Confusion Matrix:')
print(mlp_cm)

mlp_scores = cross_val_score(mlp_clf_single, X_single, y_single, cv=10)
print(f'Cross-Validation Accuracy: {mlp_scores.mean()}\n')

*****************MLP Model - Single*****************
Accuracy: 0.9519450800915332
Confusion Matrix:
[[307   0   1   1   3   0   0   1   0]
 [  0 162   2   2   0   2   0   0   1]
 [  2   2 183   4   3   0   1   0   0]
 [  3   2   0  94   2   0   1   1   0]
 [  2   2   0   2 202   0   0   0   1]
 [  1   1   1   2   0  65   0   0   0]
 [  6   0   2   1   0   0 100   0   0]
 [  1   0   0   1   0   0   0  49   0]
 [  4   1   0   1   0   0   0   0  86]]
Cross-Validation Accuracy: 0.9484157978030163



K-Nearest Neighbors Model

In [53]:
# *****************KNN Model************************* single.txt
# Instantiate the kNN classifier with a specific value of k
knn_clf_single = KNeighborsClassifier(n_neighbors=18, metric='euclidean', weights='distance')  # Adjust the value of n_neighbors and metric as needed
# Standardize the feature values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_single)

# Split the scaled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_single, test_size=0.2, random_state=42)

# Train the Model
knn_clf_single.fit(X_train, y_train)
y_pred = knn_clf_single.predict(X_test)

print("*****************KNN Model - Single*****************")

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy}')

# Print classification report
#print(classification_report(y_test, y_pred))

# Print confusion matrix
print(confusion_matrix(y_test, y_pred))

print(f'Cross-validation Accuracy: {cross_val_score(knn_clf_single, X_single, y_single, cv=10).mean()}')

*****************KNN Model - Single*****************
Accuracy: 0.9092295957284515
[[311   1   2   0   8   0   0   0   1]
 [  4 145   3   3   6   0   2   0   5]
 [  4   0 175   4   2   0   1   1   0]
 [  5   1   0  98   7   1   2   1   2]
 [  8   0   0   0 192   2   0   0   0]
 [  4   3   3   0   0  64   1   0   1]
 [  3   0   1   1   2   0  92   0   0]
 [  0   6   0   2   1   0   0  40   1]
 [  7   3   2   0   2   0   0   0  75]]
Cross-validation Accuracy: 0.9291835784770062


# Classifiers for the Final Boards

In [6]:
# Setup to train learning models **************** 2nd dataset
A = np.loadtxt('tictac_final.txt')
X_final = A[:, :9]   # Input features
y_final = A[:, 9:].ravel()   # Output labels

X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.2, random_state=42, stratify=y_final)

SVM Model

In [7]:
# **************SVM Model******************* final.txt
svm_clf_final = SVC(kernel='linear', C=0.1)
svm_clf_final.fit(X_train, y_train)

svm_y_pred = svm_clf_final.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_y_pred)
svm_cm = confusion_matrix(y_test, svm_y_pred)

print("*****************SVM Model - Final*****************")
print(f'Accuracy: {svm_accuracy}')
print('Confusion Matrix:')
print(svm_cm)

svm_scores = cross_val_score(svm_clf_final, X_final, y_final, cv=10)
print(f'Cross-Validation Accuracy: {svm_scores.mean()}\n')

*****************SVM Model - Final*****************
Accuracy: 0.9739583333333334
Confusion Matrix:
[[ 62   5]
 [  0 125]]
Cross-Validation Accuracy: 0.983157894736842



Multilayer Perceptron Model

In [8]:
# ********************MLP Model**************************** final.txt
mlp_clf_final = MLPClassifier(hidden_layer_sizes=(1024, 512, 128, 64, 32), max_iter=2000, random_state=42)

# Train the model
mlp_clf_final.fit(X_train, y_train)

mlp_y_pred = mlp_clf_final.predict(X_test)
mlp_accuracy = accuracy_score(y_test, mlp_y_pred)
mlp_cm = confusion_matrix(y_test, mlp_y_pred)

print("*****************MLP Model - Final*****************")
print(f'Accuracy: {mlp_accuracy}')
print('Confusion Matrix:')
print(mlp_cm)

mlp_scores = cross_val_score(mlp_clf_final, X_final, y_final, cv=10)
print(f'Cross-Validation Accuracy: {mlp_scores.mean()}\n')

*****************MLP Model - Final*****************
Accuracy: 0.9947916666666666
Confusion Matrix:
[[ 66   1]
 [  0 125]]
Cross-Validation Accuracy: 0.983157894736842



K-Nearest Neighbors Model

In [9]:
# *****************KNN Model************************* final.txt
# Instantiate the kNN classifier with a specific value of k
knn_clf_final = KNeighborsClassifier(n_neighbors=4, metric='euclidean', weights='distance')  # Adjust the value of n_neighbors and metric as needed
# # Standardize the feature values
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X_final)

# # Split the scaled data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_final, test_size=0.2, random_state=42)

# Train the Model
knn_clf_final.fit(X_train, y_train)
y_pred = knn_clf_final.predict(X_test)

print("*****************KNN Model - Final*****************")

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Print classification report
# print(classification_report(y_test, y_pred))

# Print confusion matrix
print(f'Confusion Matrix:\n {confusion_matrix(y_test, y_pred)}')

knn_scores = cross_val_score(knn_clf_final, X_final, y_final, cv=10)
print(f'Cross-Validation Accuracy: {knn_scores.mean()}\n')

*****************KNN Model - Final*****************
Accuracy: 1.0
Confusion Matrix:
 [[ 67   0]
 [  0 125]]
Cross-Validation Accuracy: 0.9885416666666667



# Regression Models for the Multi Label Optimal Play

In [27]:
# *****************Regression Models*****************

# Load tictac_multi.txt and put inputs and outputs into X and y
A = np.loadtxt('tictac_multi.txt')
X_multi = A[:, :9]   # Input features
y_multi = A[:, 9:]   # Output labels

#print(X_multi)
#print(y_multi)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_multi, y_multi, test_size=0.2, random_state=42)

Linear Regression

In [28]:
# *****************Linear Regression*****************

# Normal Equations
X_train_norm = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
X_test_norm = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

# Compute the pseudoinverse of X_train
X_pseudo_train = np.linalg.pinv(X_train)

# Compute the parameters Theta using the training data
Theta = np.dot(X_pseudo_train, y_train)

y_pred = X_test.dot(Theta)

# Set threshold
threshold = 0.5

# Go through and change from continous to binary data
y_pred_binary = (y_pred >= threshold).astype(int) 

predicted_moves = np.argmax(y_pred_binary, axis=1)
# print(predicted_moves)
true_moves = np.argmax(y_test, axis=1)
# print(true_moves)

# Get accuracy score
accuracy = accuracy_score(true_moves, predicted_moves)

mse = mean_squared_error(y_test, y_pred)
print("Normal Equations (EC): ")
print(f"Mean Squared Error: {mse}")
print(f"Accuracy: {accuracy}")

# Select Linear Regression model
linReg_clf = [linear_model.LinearRegression().fit(X_train, y_train[:, i]) for i in range(9)]
    
# Predict outputs for the test set
y_pred = np.array([linReg.predict(X_test) for linReg in linReg_clf]).T

# Set threshold
threshold = 0.5

# Go through and change from continous to binary data
y_pred_binary = (y_pred >= threshold).astype(int) 

predicted_moves = np.argmax(y_pred_binary, axis=1)
# print(predicted_moves)
true_moves = np.argmax(y_test, axis=1)
# print(true_moves)

# Get accuracy score
accuracy = accuracy_score(true_moves, predicted_moves)

mae = mean_absolute_error(y_test, y_pred_binary)
mse = mean_squared_error(y_test, y_pred_binary)
r2 = r2_score(y_test, y_pred_binary)

# Print results
print("*****************Linear Regression Model*****************")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")
print(f"Accuracy: {accuracy}")
# print(f"Cross-Validation Accuracy: {multi_scores.mean()}")

Normal Equations (EC): 
Mean Squared Error: 0.1858164091641541
Accuracy: 0.2463768115942029
*****************Linear Regression Model*****************
Mean Absolute Error: 0.21756081023815577
Mean Squared Error: 0.21756081023815577
R-squared: -0.28111025788714833
Accuracy: 0.2463768115942029


Multilayer Perceptron Regression

In [45]:
# *****************MLP Regression Model*****************

# Select MLP Regressor
mlpr_clf = MLPRegressor(hidden_layer_sizes=(1024, 128, 64, 32), max_iter=1200, random_state=42, alpha=0.05)

# # Fit the model to training data
# mlpr_clf.fit(X_train, y_train)

# # Make prediction
# y_pred = mlpr_clf.predict(X_test)

# # Set threshold
# threshold = 0.5

# # Go through and change from continous to binary data
# y_pred_binary = (y_pred >= threshold).astype(int)
# y_binary = (y_test >= threshold).astype(int) 

# # Get accuracy score
# accuracy = accuracy_score(y_binary, y_pred_binary)

# # K-Fold setup
# kf = KFold(n_splits = 5, shuffle=True, random_state=42)

# # Initialize list to store mean squared errors for each fold
# mse_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_multi):
    mlpr_clf.fit(X_multi[train_index], y_multi[train_index])
    print(mlpr_clf.score(X_multi[test_index], y_multi[test_index]))

# # Evaluate
# mae = mean_absolute_error(y_test, y_pred)
# r2 = r2_score(y_test, y_pred)

# Print results
# print("*****************MLP Regression Model*****************")
# print(f"Mean Absolute Error: {mae}")
# print(f"Mean Squared Error: {np.mean(mse_scores)}")
# print(f"R-squared: {r2}\n")
# print(f"Accuracy: {accuracy}")



0.9115098938402648
0.8854904864604777
0.896459649218855
0.835111181500171
0.8636821554765849


K-Nearest Neighbors

In [56]:
# *****************K-Nearest Neighbors Regression*****************

# Select KNN Regression model
knnr_clf = KNeighborsRegressor(n_neighbors=17, weights='distance')

# Fit training data
knnr_clf.fit(X_train, y_train)

# Make a prediction
y_pred = knnr_clf.predict(X_test)

# Set threshold
threshold = 0.5

# Go through and change from continous to binary data
y_pred_binary = (y_pred >= threshold).astype(int)
y_binary = (y_test >= threshold).astype(int) 

# Get accuracy score
accuracy = accuracy_score(y_binary, y_pred_binary)

# Evaluate
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
knnr_scores = cross_val_score(knnr_clf, X_multi, y_multi, cv=10)

# Print results
print("*****************KNN Regression Model*****************")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}\n")
print(f"Cross-Validation Accuracy: {knnr_scores.mean()}")
print(f"Accuracy: {accuracy}")

*****************KNN Regression Model*****************
Mean Absolute Error: 0.4764713435025091
Mean Squared Error: 1.1287599745071686
R-squared: 0.8186224719041255

Cross-Validation Accuracy: 0.8543128266071245
Accuracy: 0.9374523264683448


# TicTacToe Game

In [31]:
class TicTacToe:
    def __init__(self, model):
        self.board = [[' ' for _ in range(3)] for _ in range(3)]
        self.current_player = 'X'
        self.model = model

    # Reset the board after game over. Board returns to 0 array
    def reset(self):
        self.board = [[' ' for _ in range(3)] for _ in range(3)]
        self.current_player = 'X'

    # Return true if (row, col) inputs are valid moves
    def is_valid_move(self, row, col):
        return 3 > row >= 0 and 0 <= col < 3 and self.board[row][col] == ' '

    # Print the current state of the board
    def print_board(self):
        print("  0   1   2")
        for i, row in enumerate(self.board):
            print(f'{i} {" | ".join(row)}')
            if i < 2:
                print(' ' + '-' * 11)

    # Check if move is valid, then input the move into the board
    def make_move(self, row, col):
        if self.is_valid_move(row, col):
            self.board[row][col] = self.current_player
            return True
        return False

    # If board is full return true, otherwise false
    def is_board_full(self):
        return all(all(cell != ' ' for cell in row) for row in self.board)

    # Switch to next player
    def switch_player(self):
        self.current_player = 'O' if self.current_player == 'X' else 'X'

    # Check to see who the winner is
    def check_winner(self):
        # Check rows, columns, and diagonals for a win
        for i in range(3):
            if self.board[i][0] == self.board[i][1] == self.board[i][2] != ' ':
                return self.board[i][0]
            if self.board[0][i] == self.board[1][i] == self.board[2][i] != ' ':
                return self.board[0][i]

        if self.board[0][0] == self.board[1][1] == self.board[2][2] != ' ':
            return self.board[0][0]
        if self.board[0][0] == self.board[1][1] == self.board[2][0] != ' ':
            return self.board[0][2]

        return None
    
    # Gameplay loop
    def play_game(self):
        while True:
            print(f"Player {self.current_player}'s turn")
            self.print_board()

            if self.current_player == 'X':
                while True:
                    row = int(input('Enter row (0-2): '))
                    col = int(input('Enter column (0-2): '))
                    if self.make_move(row, col):
                        break
                
                # Check for winner
                winner = self.check_winner()
                if winner:
                    print(f"Player {winner} wins!")
                    self.print_board()
                    break
                elif self.is_board_full():
                    print("It's a draw!")
                    self.print_board()
                    break
                else:
                    self.switch_player()
            else:
                while True:
                    # Use trained model to predict 0's move
                    board_state = np.array(self.board).flatten()
                    board_state = np.where(board_state == 'X', 1, board_state)
                    board_state = np.where(board_state == 'O', -1, board_state)
                    board_state = np.where(board_state == ' ', 0, board_state)
                    board_state = board_state.astype(int)
                    
                    # Try runs if the model is a classifier, otherwise except runs as regression
                    try:
                        move = int(self.model.predict([board_state])[0])
                    # move = int(self.model.predict([board_state])[0])])
                    except:  # we have an array and we want one prediction
                        arr = (self.model.predict([board_state])[0])  # should be an array
                        highestIndex = 0
                        highestVal = 0
                        for i in range(len(arr)):
                            if arr[i] > highestVal:
                                highestIndex = i
                                highestVal = arr[i]
                        print(highestIndex)
                        move = highestIndex
                    # move = self.model.predict( [ board_state[0], board_state[1] ] )
                    print(f"Board state: {board_state}")
                    print(f"Predicted move: {move}")

                    row, col = divmod(move, 3)
                    print(f"Row, Col: {row}, {col}")
                    if not self.make_move(row, col):
                        print("Unable to move")
                        continue
                    else: 
                        break

                # Check for winner
                winner = self.check_winner()
                if winner:
                    print(f"Player {winner} wins!")
                    self.print_board()
                    break
                elif self.is_board_full():
                    print("It's a draw!")
                    self.print_board()
                    break
                else:
                    self.switch_player()

Main Function

In [54]:
if __name__ == '__main__':
    while True:
        print("TicTacToe Game\n")
        print("Select Model")
        print("------------")
        print("1. Play\n2. Exit")
        res = input("What do you want to do? ")

        match res:
            case "1":
                game = TicTacToe(mlp_clf_single)
                game.play_game()
            case "2":
                break
        

TicTacToe Game

Select Model
------------
1. Play
2. Exit
What do you want to do? 1
Player X's turn
  0   1   2
0   |   |  
 -----------
1   |   |  
 -----------
2   |   |  
Enter row (0-2): 0
Enter column (0-2): 0
Player O's turn
  0   1   2
0 X |   |  
 -----------
1   |   |  
 -----------
2   |   |  
Board state: [1 0 0 0 0 0 0 0 0]
Predicted move: 4
Row, Col: 1, 1
Player X's turn
  0   1   2
0 X |   |  
 -----------
1   | O |  
 -----------
2   |   |  
Enter row (0-2): 1
Enter column (0-2): 0
Player O's turn
  0   1   2
0 X |   |  
 -----------
1 X | O |  
 -----------
2   |   |  
Board state: [ 1  0  0  1 -1  0  0  0  0]
Predicted move: 6
Row, Col: 2, 0
Player X's turn
  0   1   2
0 X |   |  
 -----------
1 X | O |  
 -----------
2 O |   |  
Enter row (0-2): 0
Enter column (0-2): 2
Player O's turn
  0   1   2
0 X |   | X
 -----------
1 X | O |  
 -----------
2 O |   |  
Board state: [ 1  0  1  1 -1  0 -1  0  0]
Predicted move: 1
Row, Col: 0, 1
Player X's turn
  0   1   2
0 X | O |

# Extra Credit

1. Implement Linear Regression using Normal equations. 10 points Look at Linear regression block

2. Train the models on 1/10th of the data and explain what happens. 5 points
For the linear SVM model, we used 1/10th of the data and the model got slightly more accurate. The reasoning for this is because using a smaller dataset means reduced noise, less overfitting, simpler decision boundaries, and better feature representation.

3. Investigate and report what happens when a substantial fraction of ground truth
values are corrupted by random noise. Explain why certain models scale better
to larger dataset than others. 5 points
	When ground truth values are corrupted by random noise, it can decrease the accuracy of the model because the model may be misled by the random noise or overfit to it. The inconsistencies and errors associated with the random noise requires the model to be more robust so it can handle the multiple variations in the data. Without this, the accuracy and generalizations of the model will become worse. 
Certain models scale better to larger datasets than others largely because these models have less computational and/or feature complexity than others. This is why linear SVM is bad with large data sets. While its complexity for training is linear, O(n), its complexity for features is quadratic, O(n2). It can also depend on the amount of memory a specific model needs, as more and more memory is needed for increasing data. Outliers are also more likely to be in a large amount of data, which may negatively impact certain models more than others.
