In [1]:
import numpy as np
import random
import copy


class ExperimentGenerator():
    ''' جهت تولید مسئله جدید استفاده میشود '''
    
    def __init__(self):
        self.initBoardState = [[' ',' ',' '],[' ',' ',' '],[' ',' ',' ']]

    def generateNewProblem(self):
        return(self.initBoardState)

    
class Player:
    ''' کلاس بازیکن شامل تمام توابع مورد نیاز '''

    def __init__(self,playerSymbol,playerTargetFunctionWeightVector):        
        self.playerSymbol = playerSymbol
        self.playerTargetFunctionWeightVector = playerTargetFunctionWeightVector

    def isGameOver(self,board,playerSymbol):
        ''' کنترل اتمام بازی '''
        
        flag = False
        
        # بازی تمام شده باشد
        if(board == -1):
            flag = True        
        
        # یکی از یازیکن ها برنده شده باشد
        elif((board[0][0] == board[0][1] == board[0][2] == playerSymbol)  or 
            (board[1][0] == board[1][1] == board[1][2] == playerSymbol) or
            (board[2][0] == board[2][1] == board[2][2] == playerSymbol) or
            (board[0][0] == board[1][0] == board[2][0] == playerSymbol) or
            (board[0][1] == board[1][1] == board[2][1] == playerSymbol) or
            (board[0][2] == board[1][2] == board[2][2] == playerSymbol) or
            (board[0][0] == board[1][1] == board[2][2] == playerSymbol) or
            (board[0][2] == board[1][1] == board[2][0] == playerSymbol) ): 
                flag = True
        
        # بازی مساوی شده باشد و برنده نداشته باشد
        elif(' ' not in np.array(board).flatten()):
            flag = True
        
        return(flag)    

    def lookForLegalMoves(self,boardState,playerSymbol):
        ''' لیستی از حرکت های مجاز در هر وضعیت تخته بازی برمیگرداند '''
        legalMoves = []
        for i in range(len(boardState[0])):
            for j in range(len(boardState[0])):
                if(boardState[i][j] == ' '):
                    tempBoard = copy.deepcopy(boardState)
                    tempBoard[i][j]=playerSymbol
                    legalMoves.append(tempBoard)                    
        return(legalMoves)    

    def extractFeatures(self,board,playerSymbol1,playerSymbol2):
        ''' محاسبه فیچر ها در هر وضعیت تخته بازی '''
        
        x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12 = 1,0,0,0,0,0,0,0,0,0,0,0,0
        
        for i in range(3):
            
            # 1 Player-1's Symbol in a row in an open row
            if (((board[i][0] == playerSymbol1) and ( board[i][1] == board[i][2] == ' ')) or
                ((board[i][2] == playerSymbol1) and ( board[i][0] == board[i][1] == ' '))):
                x1 = x1 + 1
                
            # 1 Player-1's Symbol in a row in an open col
            if (((board[0][i] == playerSymbol1) and ( board[1][i] == board[2][i] == ' ')) or
                ((board[2][i] == playerSymbol1) and ( board[0][i] == board[1][i] == ' '))):
                x1 = x1 + 1 
                
            # 1 Player-2's Symbol in a row in an open row
            if (((board[i][0] == playerSymbol2) and ( board[i][1] == board[i][2] == ' ')) or
                ((board[i][2] == playerSymbol2) and ( board[i][0] == board[i][1] == ' '))):
                x2 = x2 + 1
                
            # 1 Player-2's Symbol in a row in an open col
            if (((board[0][i] == playerSymbol2) and ( board[1][i] == board[2][i] == ' ')) or
                ((board[2][i] == playerSymbol2) and ( board[0][i] == board[1][i] == ' '))):
                x2 = x2 + 1  
                
            # 2 Player-1's Symbols in a row
            if (((board[i][0] == board[i][1] == playerSymbol1) and ((board[i][2]) == ' ')) or
                ((board[i][1] == board[i][2] == playerSymbol1) and ((board[i][0]) == ' '))):
                x3 = x3 + 1 
                
            # 2 Player-2's Symbols in a row
            if (((board[i][0] == board[i][1] == playerSymbol2) and ((board[i][2]) == ' ')) or
                ((board[i][1] == board[i][2] == playerSymbol2) and ((board[i][0]) == ' '))):
                x4 = x4 + 1 
                
            # 3 Player-1's Symbols in a row with an open box
            if ((board[i][0] == board[i][1] == board[i][2] == playerSymbol1)):
                x5 = x5 + 1 
                
            # 3 Player-2's Symbols in a row with an open box
            if (board[i][0] == board[i][1] == board[i][2] == playerSymbol2 ):
                x6 = x6 + 1
        
        
        # 1 Player-1's Symbol in a diagonal
        if (((board[0][0] == playerSymbol1) and ( board[1][1] == board[2][2] == ' ')) or
            ((board[2][2] == playerSymbol1) and ( board[0][0] == board[1][1] == ' ')) or
            ((board[0][2] == playerSymbol1) and ( board[1][1] == board[2][0] == ' ')) or
            ((board[2][0] == playerSymbol1) and ( board[0][2] == board[1][1] == ' '))):
            x7 = x7 + 1

        # 1 Player-2's Symbol in a diagonal
        if (((board[0][0] == playerSymbol2) and ( board[1][1] == board[2][2] == ' ')) or
            ((board[2][2] == playerSymbol2) and ( board[0][0] == board[1][1] == ' ')) or
            ((board[0][2] == playerSymbol2) and ( board[1][1] == board[2][0] == ' ')) or
            ((board[2][0] == playerSymbol2) and ( board[0][2] == board[1][1] == ' '))):
            x8 = x8 + 1                

        # 2 Player-1's Symbols in a diagonal
        if (((board[0][0] == board[1][1] == playerSymbol1) and ((board[2][2]) == ' ')) or
            ((board[1][1] == board[2][2] == playerSymbol1) and ((board[0][0]) == ' ')) or
            ((board[0][2] == board[1][1] == playerSymbol1) and ((board[2][0]) == ' ')) or
            ((board[1][1] == board[2][0] == playerSymbol1) and ((board[0][2]) == ' '))):
            x9 = x9 + 1    

        # 2 Player-2's Symbols in a diagonal
        if (((board[0][0] == board[1][1] == playerSymbol2) and ((board[2][2]) == ' ')) or
            ((board[1][1] == board[2][2] == playerSymbol2) and ((board[0][0]) == ' ')) or
            ((board[0][2] == board[1][1] == playerSymbol2) and ((board[2][0]) == ' ')) or
            ((board[1][1] == board[2][0] == playerSymbol2) and ((board[0][2]) == ' '))):
            x10 = x10 + 1    


        # 3 Player-1's Symbols in a diagonal
        if ((board[0][0] == board[1][1] == board[2][2] == playerSymbol1) or
            (board[0][2] == board[1][1] == board[2][0] == playerSymbol1)):
            x11 = x11 + 1   

        # 3 Player-2's Symbols in a diagonal
        if ((board[0][0] == board[1][1] == board[2][2] == playerSymbol2) or
            (board[0][2] == board[1][1] == board[2][0] == playerSymbol2)):
            x12 = x12 + 1
        
        #Added 1 for bias
        x0 = 1
        feature_vector = [x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12]     
#         feature_vector = [x0, x1, x2, x3, x4, x5, x6]       
        
        return(feature_vector)
        
    def boardPrint(self,board):
        print('\n')
        print(board[0][0] + '|' + board[0][1] + '|' + board[0][2])
        print("-----")
        print(board[1][0] + '|' + board[1][1] + '|' + board[1][2])
        print("-----")
        print(board[2][0] + '|' + board[2][1] + '|' + board[2][2])
        print('\n')    

    def calculateNonFinalBoardScore(self,weight_vector,feature_vector):
        ''' محاسبه امتیاز هر وضعیت از صفحه بازی '''
        weight_vector = np.array(weight_vector).reshape((len(weight_vector),1))
        feature_vector = np.array(feature_vector).reshape((len(feature_vector),1))
        boardScore = np.dot(weight_vector.T,feature_vector)
        return(boardScore[0][0])
    
    def chooseMove(self,board,playerSymbol1,playerSymbol2):
        ''' یافتن بهترین حرکت از بین حرکت های مجاز در هر وضعیت از صفحه بازی '''
        legalMoves = self.lookForLegalMoves(board,playerSymbol1)
        legalMoveScores = [self.calculateNonFinalBoardScore(self.playerTargetFunctionWeightVector,
            self.extractFeatures(i,playerSymbol1,playerSymbol2)) for i in legalMoves]
        newBoard = legalMoves[np.argmax(legalMoveScores)]
        return(newBoard)        
                    
    def chooseRandomMove(self,board,playerSymbol):
        ''' انتخاب رندوم یک حرکت از بین حرکت های مجاز هر وضعیت از صفحه بازی '''
        legalMoves = self.lookForLegalMoves(board,playerSymbol)
        newBoard = random.choice(legalMoves)
        return(newBoard)            


class PerformanceSystem:
    ''' یافتن مسیر انتخاب تمام وضعیت های صفحه بازی داده شده تا اتمام بازی '''
    
    def __init__(self,initialBoard,playersTargetFunctionWeightVectors,playerSymbols):

        self.board = initialBoard
        self.playersTargetFunctionWeightVectors = playersTargetFunctionWeightVectors
        self.playerSymbols = playerSymbols
    
    def isGameOver(self,board,playerSymbol):
        ''' کنترل اتمام بازی '''
        
        flag = False
        
        # بازی تمام شده باشد
        if(board == -1):
            flag = True
            
        # یکی از یازیکن ها برنده شده باشد
        elif((board[0][0] == board[0][1] == board[0][2] == playerSymbol)  or 
            (board[1][0] == board[1][1] == board[1][2] == playerSymbol) or
            (board[2][0] == board[2][1] == board[2][2] == playerSymbol) or
            (board[0][0] == board[1][0] == board[2][0] == playerSymbol) or
            (board[0][1] == board[1][1] == board[2][1] == playerSymbol) or
            (board[0][2] == board[1][2] == board[2][2] == playerSymbol) or
            (board[0][0] == board[1][1] == board[2][2] == playerSymbol) or
            (board[0][2] == board[1][1] == board[2][0] == playerSymbol) ): 
                flag = True
        
        # بازی مساوی شده باشد و برنده نداشته باشد
        elif(' ' not in np.array(board).flatten()):
            flag = True
            
        return(flag)
        
    def generateGameHistory(self, generateType='train'):
        ''' تولید مسیر انتخاب تمام وضعیت های صفحه بازی تا اتمام بازی '''
        firstMove = True
        gameHistory = []
        gameStatusFlag = True
        player1 = Player(self.playerSymbols[0],self.playersTargetFunctionWeightVectors[0])
        player2 = Player(self.playerSymbols[1],self.playersTargetFunctionWeightVectors[1])
        tempBoard = copy.deepcopy(self.board)

        while(gameStatusFlag):             
            if firstMove:
                tempBoard = player1.chooseRandomMove(tempBoard,player1.playerSymbol)
                firstMove = False
            else:
                tempBoard = player1.chooseMove(tempBoard,player1.playerSymbol,player2.playerSymbol)
            gameHistory.append(tempBoard)
            gameStatusFlag = not self.isGameOver(tempBoard,player1.playerSymbol)
            if(gameStatusFlag == False):
                break
            if generateType=='train':
                tempBoard = player2.chooseRandomMove(tempBoard,player2.playerSymbol)
            elif generateType=='game':
                tempBoard = player2.chooseMove(tempBoard,player2.playerSymbol,player1.playerSymbol)

            gameHistory.append(tempBoard)
            gameStatusFlag =  not self.isGameOver(tempBoard,player2.playerSymbol)                    
        
        return(gameHistory)    


class Critic:
    ''' کلاس تولید نمونه های آموزشی براساس سابقه بازی '''
    
    def __init__(self,gameHistory):
        self.gameHistory = gameHistory

    def extractFeatures(self,board,playerSymbol1,playerSymbol2):
        ''' محاسبه فیچر ها در هر وضعیت تخته بازی '''
        
        x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12 = 1,0,0,0,0,0,0,0,0,0,0,0,0
        
        for i in range(3):
            
            # 1 Player-1's Symbol in a row in an open row
            if (((board[i][0] == playerSymbol1) and ( board[i][1] == board[i][2] == ' ')) or
                ((board[i][2] == playerSymbol1) and ( board[i][0] == board[i][1] == ' '))):
                x1 = x1 + 1
                
            # 1 Player-1's Symbol in a row in an open col
            if (((board[0][i] == playerSymbol1) and ( board[1][i] == board[2][i] == ' ')) or
                ((board[2][i] == playerSymbol1) and ( board[0][i] == board[1][i] == ' '))):
                x1 = x1 + 1 
                
            # 1 Player-2's Symbol in a row in an open row
            if (((board[i][0] == playerSymbol2) and ( board[i][1] == board[i][2] == ' ')) or
                ((board[i][2] == playerSymbol2) and ( board[i][0] == board[i][1] == ' '))):
                x2 = x2 + 1
                
            # 1 Player-2's Symbol in a row in an open col
            if (((board[0][i] == playerSymbol2) and ( board[1][i] == board[2][i] == ' ')) or
                ((board[2][i] == playerSymbol2) and ( board[0][i] == board[1][i] == ' '))):
                x2 = x2 + 1  
                
            # 2 Player-1's Symbols in a row
            if (((board[i][0] == board[i][1] == playerSymbol1) and ((board[i][2]) == ' ')) or
                ((board[i][1] == board[i][2] == playerSymbol1) and ((board[i][0]) == ' '))):
                x3 = x3 + 1 
                
            # 2 Player-2's Symbols in a row
            if (((board[i][0] == board[i][1] == playerSymbol2) and ((board[i][2]) == ' ')) or
                ((board[i][1] == board[i][2] == playerSymbol2) and ((board[i][0]) == ' '))):
                x4 = x4 + 1   
                
            # 3 Player-1's Symbols in a row with an open box
            if ((board[i][0] == board[i][1] == board[i][2] == playerSymbol1)):
                x5 = x5 + 1 
                
            # 3 Player-2's Symbols in a row with an open box
            if (board[i][0] == board[i][1] == board[i][2] == playerSymbol2 ):
                x6 = x6 + 1
        
        
        # 1 Player-1's Symbol in a diagonal
        if (((board[0][0] == playerSymbol1) and ( board[1][1] == board[2][2] == ' ')) or
            ((board[2][2] == playerSymbol1) and ( board[0][0] == board[1][1] == ' ')) or
            ((board[0][2] == playerSymbol1) and ( board[1][1] == board[2][0] == ' ')) or
            ((board[2][0] == playerSymbol1) and ( board[0][2] == board[1][1] == ' '))):
            x7 = x7 + 1

        # 1 Player-2's Symbol in a diagonal
        if (((board[0][0] == playerSymbol2) and ( board[1][1] == board[2][2] == ' ')) or
            ((board[2][2] == playerSymbol2) and ( board[0][0] == board[1][1] == ' ')) or
            ((board[0][2] == playerSymbol2) and ( board[1][1] == board[2][0] == ' ')) or
            ((board[2][0] == playerSymbol2) and ( board[0][2] == board[1][1] == ' '))):
            x8 = x8 + 1                

        # 2 Player-1's Symbols in a diagonal
        if (((board[0][0] == board[1][1] == playerSymbol1) and ((board[2][2]) == ' ')) or
            ((board[1][1] == board[2][2] == playerSymbol1) and ((board[0][0]) == ' ')) or
            ((board[0][2] == board[1][1] == playerSymbol1) and ((board[2][0]) == ' ')) or
            ((board[1][1] == board[2][0] == playerSymbol1) and ((board[0][2]) == ' '))):
            x9 = x9 + 1    

        # 2 Player-2's Symbols in a diagonal
        if (((board[0][0] == board[1][1] == playerSymbol2) and ((board[2][2]) == ' ')) or
            ((board[1][1] == board[2][2] == playerSymbol2) and ((board[0][0]) == ' ')) or
            ((board[0][2] == board[1][1] == playerSymbol2) and ((board[2][0]) == ' ')) or
            ((board[1][1] == board[2][0] == playerSymbol2) and ((board[0][2]) == ' '))):
            x10 = x10 + 1    


        # 3 Player-1's Symbols in a diagonal
        if ((board[0][0] == board[1][1] == board[2][2] == playerSymbol1) or
            (board[0][2] == board[1][1] == board[2][0] == playerSymbol1)):
            x11 = x11 + 1   

        # 3 Player-2's Symbols in a diagonal
        if ((board[0][0] == board[1][1] == board[2][2] == playerSymbol2) or
            (board[0][2] == board[1][1] == board[2][0] == playerSymbol2)):
            x12 = x12 + 1
        
        #Added 1 for bias
        x0 = 1
        feature_vector = [x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12]        
#         feature_vector = [x0, x1, x2, x3, x4, x5, x6]        
        
        return(feature_vector)
        


    def calculateNonFinalBoardScore(self,weight_vector,feature_vector):
        ''' محاسبه امتیاز هر وضعیت از صفحه بازی '''
        weight_vector = np.array(weight_vector).reshape((len(weight_vector),1))
        feature_vector = np.array(feature_vector).reshape((len(feature_vector),1))
        boardScore = np.dot(weight_vector.T,feature_vector)
        return(boardScore[0][0])

    def calculateFinalBoardScore(self,board,playerSymbol1,playerSymbol2):
        ''' محاسبه امتیاز وضعیت پایانی صفحه بازی '''

        # اگر بازی مساوی شود
        score = 0
        
        # If player-1 (i.e self) wins
        if((board[0][0] == board[0][1] == board[0][2] == playerSymbol1)  or 
            (board[1][0] == board[1][1] == board[1][2] == playerSymbol1) or
            (board[2][0] == board[2][1] == board[2][2] == playerSymbol1) or
            (board[0][0] == board[1][0] == board[2][0] == playerSymbol1) or
            (board[0][1] == board[1][1] == board[2][1] == playerSymbol1) or
            (board[0][2] == board[1][2] == board[2][2] == playerSymbol1) or
            (board[0][0] == board[1][1] == board[2][2] == playerSymbol1) or
            (board[0][2] == board[1][1] == board[2][0] == playerSymbol1) ):
            score = 100
        # If player-2 (i.e opponent) wins    
        elif((board[0][0] == board[0][1] == board[0][2] == playerSymbol2)  or 
            (board[1][0] == board[1][1] == board[1][2] == playerSymbol2) or
            (board[2][0] == board[2][1] == board[2][2] == playerSymbol2) or
            (board[0][0] == board[1][0] == board[2][0] == playerSymbol2) or
            (board[0][1] == board[1][1] == board[2][1] == playerSymbol2) or
            (board[0][2] == board[1][2] == board[2][2] == playerSymbol2) or
            (board[0][0] == board[1][1] == board[2][2] == playerSymbol2) or
            (board[0][2] == board[1][1] == board[2][0] == playerSymbol2) ):
            score = -100
        return(score)        
            

    def generateTrainingSamples(self,weight_vector,playerSymbol1,playerSymbol2):
        ''' تولید نمونه های آموزشی '''
        trainingExamples=[]
        for i in range(len(self.gameHistory)-1):
            feature_vector = self.extractFeatures(self.gameHistory[i+1],playerSymbol1,playerSymbol2)
            trainingExamples.append([feature_vector,self.calculateNonFinalBoardScore(weight_vector,feature_vector)])
        trainingExamples.append([self.extractFeatures(self.gameHistory[-1],playerSymbol1,playerSymbol2),
            self.calculateFinalBoardScore(self.gameHistory[-1],playerSymbol1,playerSymbol2)])
        return(trainingExamples)

    def arrayPrint(self,board):
        print('\n')
        print(board[0][0] + '|' + board[0][1] + '|' + board[0][2])
        print("-----")
        print(board[1][0] + '|' + board[1][1] + '|' + board[1][2])
        print("-----")
        print(board[2][0] + '|' + board[2][1] + '|' + board[2][2])
        print('\n')

    def boardDisplay(self,playerSymbol1,playerSymbol2,gameStatusCount, showBoard=True):
#         if showBoard:
#             for board in self.gameHistory:
#                 self.arrayPrint(board)        
                
        finalScore = self.calculateFinalBoardScore(self.gameHistory[-1],playerSymbol1,playerSymbol2)
        if(finalScore == 100):
            if showBoard:
                print(playerSymbol1 + " wins")
            gameStatusCount[0] = gameStatusCount[0] + 1
        elif(finalScore == -100):
            if showBoard:
                print(playerSymbol2 + " wins")
            gameStatusCount[1] = gameStatusCount[1] + 1
        else:
            if showBoard:
                print("Draw")
            gameStatusCount[2] = gameStatusCount[2] + 1    
        return(gameStatusCount)             
        

class Generalizer:
    ''' کلاس بهبود ضرایب تخمینی تارگت فانکشن '''
    
    def __init__(self,trainingExamples):
        self.trainingExamples = trainingExamples

    def calculateNonFinalBoardScore(self,weight_vector,feature_vector):
        ''' محاسبه امتیاز هر وضعیت از صفحه بازی '''
        weight_vector = np.array(weight_vector).reshape((len(weight_vector),1))
        feature_vector = np.array(feature_vector).reshape((len(feature_vector),1))
        boardScore = np.dot(weight_vector.T,feature_vector)
        return(boardScore[0][0])    

    def lmsWeightUpdate(self,weight_vector,alpha=0.4):
        ''' محاسبه ضرایب جدید به روش Least Mean Squares '''
        for trainingExample in self.trainingExamples:
            vTrainBoardState = trainingExample[1]
            vHatBoardState = self.calculateNonFinalBoardScore(weight_vector,trainingExample[0])
            weight_vector = weight_vector + (alpha * (vTrainBoardState - vHatBoardState) * np.array(trainingExample[0]))
        return (weight_vector)    
        


# def Game(GameType='CvH', player1_LearnedWeightVectors=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), player2_LearnedWeightVectors=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), numGames=0, _showBoard=False):
def Game(GameType='CvH', player1_LearnedWeightVectors=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), player2_LearnedWeightVectors=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), numGames=0, _showBoard=False):
    
    playerSymbols = ('X','O')
    gameStatusCount = [0,0,0]
    playersTargetFunctionWeightVectors = [copy.deepcopy(player1_LearnedWeightVectors), copy.deepcopy(player2_LearnedWeightVectors)]
    
    # فاز تست کامپیوتر با انسان
    if GameType == 'CvH':
        print("\nDo you want to play(y/n) v/s Computer AI")
        ans = input() 
        while(ans == "y"):
            
            learntWeight =  list(np.mean(np.array([playersTargetFunctionWeightVectors[0],
            playersTargetFunctionWeightVectors[1]]),axis = 0))
            
            experimentGenerator = ExperimentGenerator()
            boardState = experimentGenerator.generateNewProblem()
            gameStatusFlag = True
            computer = Player(playerSymbols[0], learntWeight)
            gameHistory = []          
            winner = ''
            
            print('\nBegin Computer(X) v/s Human(O) Tic-Tac-Toe\n')
            while(gameStatusFlag):

                boardState = computer.chooseMove(boardState,playerSymbols[0],playerSymbols[1])
                print('Computers\'s Turn:\n')
                computer.boardPrint(boardState)
                gameHistory.append(boardState)
                gameStatusFlag = not computer.isGameOver(boardState,playerSymbols[0])
                if(gameStatusFlag == False):
                    break

                print('Human\'s Turn:\n')    
                print('Enter X-coordinate(0-2):')
                x = int(input())
                print('Enter Y-coordinate(0-2):')
                y = int(input())

                boardState[x][y] = playerSymbols[1]
                
                computer.boardPrint(boardState)
                
                gameHistory.append(boardState)
                gameStatusFlag = not computer.isGameOver(boardState,playerSymbols[1])
                if(gameStatusFlag == False):
                    break
            
            ##################
            # Critic
            critic = Critic(gameHistory)
            trainingExamplesPlayer1 = critic.generateTrainingSamples(playersTargetFunctionWeightVectors[0],
                playerSymbols[0],playerSymbols[1])
            trainingExamplesPlayer2 = critic.generateTrainingSamples(playersTargetFunctionWeightVectors[1],
                playerSymbols[1],playerSymbols[0])
            # Display board states
            gameStatusCount = critic.boardDisplay(playerSymbols[0],playerSymbols[1],gameStatusCount, showBoard=True)
            
            # Generalizer
            generalizer = Generalizer(trainingExamplesPlayer1)
            playersTargetFunctionWeightVectors = [generalizer.lmsWeightUpdate(playersTargetFunctionWeightVectors[0]),
                generalizer.lmsWeightUpdate(playersTargetFunctionWeightVectors[1])]   
            ##################
            
            print(" ")
            print("Do you want to continue playing(y/n).")
            ans = input()
            if(ans != 'y'):
                break
                
        # گزارش وضعیت هر کدام از بازی ها طی آموزش  
        print("\nGames Results: (" + "Player-1 Wins = " + str(gameStatusCount[0]) +
                ", Player-2 Wins = " + str(gameStatusCount[1]) + ", Game Draws = " + str(gameStatusCount[2]) +
                ")\n")    

        # گزارش ضرایب آموزش دیده شده
        learntWeight =  list(np.mean(np.array([playersTargetFunctionWeightVectors[0],
            playersTargetFunctionWeightVectors[1]]),axis = 0))
        print("Early Learned Weight Vector For Player 1: \n"+ str(player1_LearnedWeightVectors))
        print(" ")
        print("Final Learned Weight Vector For Player 2: \n"+ str(learntWeight))

    return playersTargetFunctionWeightVectors[0]




In [6]:
P1_LearnedWeightVectors = [71.44560870863126, -245.17811003388516, 111.95797585816914, 10.734171255002673, 0.6862067591637899, 42.530228819088805, -132.06480838648295, -127.9042291708675, -15.22818111721062, 13.747848875988776, -57.90152128425417, 54.08550436965859, -119.14923430487816]
P2_LearnedWeightVectors = [6.9798029117994425, -208.70100142657418, 12.540660929730969, 28.112940430297865, -15.059685629438865, 97.78743124495205, -98.20712597701547, 7.9649989252153475, -75.19655283619173, 41.55345685130794, -22.905654125448994, 90.86416135654544, -73.84098057221422]

P11_LearnedWeightVectors = Game(GameType='CvH', 
                                player1_LearnedWeightVectors=P2_LearnedWeightVectors )



Do you want to play(y/n) v/s Computer AI
y

Begin Computer(X) v/s Human(O) Tic-Tac-Toe

Computers's Turn:



 | | 
-----
 |X| 
-----
 | | 


Human's Turn:

Enter X-coordinate(0-2):
0
Enter Y-coordinate(0-2):
0


O| | 
-----
 |X| 
-----
 | | 


Computers's Turn:



O| | 
-----
 |X|X
-----
 | | 


Human's Turn:

Enter X-coordinate(0-2):
0
Enter Y-coordinate(0-2):
1


O|O| 
-----
 |X|X
-----
 | | 


Computers's Turn:



O|O| 
-----
X|X|X
-----
 | | 


X wins
 
Do you want to continue playing(y/n).
n

Games Results: (Player-1 Wins = 1, Player-2 Wins = 0, Game Draws = 0)

Early Learned Weight Vector For Player 1: 
[6.9798029117994425, -208.70100142657418, 12.540660929730969, 28.112940430297865, -15.059685629438865, 97.78743124495205, -98.20712597701547, 7.9649989252153475, -75.19655283619173, 41.55345685130794, -22.905654125448994, 90.86416135654544, -73.84098057221422]
 
Final Learned Weight Vector For Player 2: 
[26.15049340304226, -104.35050071328709, 18.622809886720095, 13.584499728498