# Linear Regression Baseline

## Preprocess Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read the data
train = pd.read_csv('../Data/train.csv', index_col='Id')

In [3]:
# Getting the selection, exploration, playout and bounds from the agent columns
# Function to extract features based on the pattern provided
def extract_features(agent_column):
    selection = agent_column.str.extract(r'MCTS-(.*)-(.*)-(.*)-(.*)', expand=True)[0].astype('category')
    exploration = agent_column.str.extract(r'MCTS-(.*)-(.*)-(.*)-(.*)', expand=True)[1].astype(float)
    playout = agent_column.str.extract(r'MCTS-(.*)-(.*)-(.*)-(.*)', expand=True)[2].astype('category')
    bounds = agent_column.str.extract(r'MCTS-(.*)-(.*)-(.*)-(.*)', expand=True)[3].astype('category')
    return selection, exploration, playout, bounds

# Applying the function to extract features for agent1 and agent2
train['p1_selection'], train['p1_exploration'], train['p1_playout'], train['p1_bounds'] = extract_features(train['agent1'])
train['p2_selection'], train['p2_exploration'], train['p2_playout'], train['p2_bounds'] = extract_features(train['agent2'])

train = train.drop(["agent1", "agent2"], axis=1) 

In [4]:
# Filter by the reduced columns obtained in the Exploratory Data Analysis - Understanding Assignment and Features
train = train[["Stochastic", "AsymmetricPiecesType", "Team", "Shape", "SquareShape", "HexShape", "TriangleShape", "DiamondShape", "RectangleShape", "StarShape", "RegularShape", "PolygonShape", "Tiling", "SquareTiling", "HexTiling", "TriangleTiling", "SemiRegularTiling", "MorrisTiling", "CircleTiling", "ConcentricTiling", "SpiralTiling", "AlquerqueTiling", "MancalaStores", "MancalaTwoRows", "MancalaThreeRows", "MancalaFourRows", "MancalaSixRows", "MancalaCircular", "AlquerqueBoard", "AlquerqueBoardWithOneTriangle", "AlquerqueBoardWithTwoTriangles", "AlquerqueBoardWithFourTriangles", "AlquerqueBoardWithEightTriangles", "ThreeMensMorrisBoard", "ThreeMensMorrisBoardWithTwoTriangles", "NineMensMorrisBoard", "StarBoard", "CrossBoard", "KintsBoard", "PachisiBoard", "FortyStonesWithFourGapsBoard", "Track", "TrackLoop", "TrackOwned", "Region", "Boardless", "Vertex", "Cell", "Edge", "NumPlayableSitesOnBoard", "NumColumns", "NumRows", "NumCorners", "NumDirections", "NumOrthogonalDirections", "NumDiagonalDirections", "NumAdjacentDirections", "NumOffDiagonalDirections", "NumInnerSites", "NumLayers", "NumEdges", "NumCells", "NumVertices", "NumPerimeterSites", "NumTopSites", "NumBottomSites", "NumRightSites", "NumLeftSites", "NumCentreSites", "NumConvexCorners", "NumConcaveCorners", "NumPhasesBoard", "Hand", "NumContainers", "NumPlayableSites", "Piece", "PieceValue", "PieceDirection", "DiceD2", "DiceD4", "DiceD6", "LargePiece", "Tile", "NumComponentsType", "NumComponentsTypePerPlayer", "NumDice", "Meta", "SwapOption", "Repetition", "TurnKo", "PositionalSuperko", "Start", "PiecesPlacedOnBoard", "PiecesPlacedOutsideBoard", "InitialRandomPlacement", "InitialScore", "InitialCost", "NumStartComponentsBoard", "NumStartComponentsHand", "NumStartComponents", "Moves", "MovesDecision", "NoSiteMoves", "VoteDecision", "SwapPlayersDecision", "SwapPlayersDecisionFrequency", "PassDecision", "PassDecisionFrequency", "ProposeDecision", "ProposeDecisionFrequency", "SingleSiteMoves", "AddDecision", "AddDecisionFrequency", "PromotionDecision", "PromotionDecisionFrequency", "RemoveDecision", "RemoveDecisionFrequency", "RotationDecision", "TwoSitesMoves", "StepDecision", "StepDecisionFrequency", "StepDecisionToEmpty", "StepDecisionToEmptyFrequency", "StepDecisionToFriend", "StepDecisionToFriendFrequency", "StepDecisionToEnemy", "StepDecisionToEnemyFrequency", "SlideDecision", "SlideDecisionFrequency", "SlideDecisionToEmpty", "SlideDecisionToEmptyFrequency", "SlideDecisionToEnemy", "SlideDecisionToEnemyFrequency", "SlideDecisionToFriend", "SlideDecisionToFriendFrequency", "LeapDecision", "LeapDecisionFrequency", "LeapDecisionToEmpty", "LeapDecisionToEmptyFrequency", "LeapDecisionToEnemy", "LeapDecisionToEnemyFrequency", "HopDecision", "HopDecisionFrequency", "HopDecisionMoreThanOne", "HopDecisionMoreThanOneFrequency", "HopDecisionEnemyToEmpty", "HopDecisionEnemyToEmptyFrequency", "HopDecisionFriendToEmpty", "HopDecisionFriendToEmptyFrequency", "HopDecisionFriendToFriendFrequency", "HopDecisionEnemyToEnemy", "HopDecisionEnemyToEnemyFrequency", "HopDecisionFriendToEnemy", "HopDecisionFriendToEnemyFrequency", "FromToDecision", "FromToDecisionFrequency", "FromToDecisionWithinBoardFrequency", "FromToDecisionBetweenContainersFrequency", "FromToDecisionEmpty", "FromToDecisionEmptyFrequency", "FromToDecisionEnemy", "FromToDecisionEnemyFrequency", "FromToDecisionFriend", "FromToDecisionFriendFrequency", "SwapPiecesDecision", "SwapPiecesDecisionFrequency", "ShootDecision", "MovesNonDecision", "MovesEffects", "VoteEffect", "SwapPlayersEffect", "PassEffect", "Roll", "RollFrequency", "ProposeEffect", "ProposeEffectFrequency", "AddEffect", "AddEffectFrequency", "SowFrequency", "SowWithEffect", "SowCapture", "SowCaptureFrequency", "SowRemove", "SowRemoveFrequency", "SowBacktracking", "SowBacktrackingFrequency", "SowSkip", "SowOriginFirst", "SowCW", "SowCCW", "PromotionEffect", "PromotionEffectFrequency", "RemoveEffect", "RemoveEffectFrequency", "PushEffect", "PushEffectFrequency", "Flip", "FlipFrequency", "SetMove", "SetNextPlayer", "SetNextPlayerFrequency", "MoveAgain", "MoveAgainFrequency", "SetValue", "SetValueFrequency", "SetCount", "SetCountFrequency", "SetRotation", "StepEffect", "SlideEffect", "LeapEffect", "HopEffect", "FromToEffect", "MovesOperators", "Priority", "ByDieMove", "MaxMovesInTurn", "MaxDistance", "Capture", "ReplacementCapture", "ReplacementCaptureFrequency", "HopCapture", "HopCaptureFrequency", "HopCaptureMoreThanOne", "HopCaptureMoreThanOneFrequency", "DirectionCapture", "DirectionCaptureFrequency", "EncloseCapture", "EncloseCaptureFrequency", "CustodialCapture", "CustodialCaptureFrequency", "InterveneCapture", "InterveneCaptureFrequency", "SurroundCapture", "SurroundCaptureFrequency", "CaptureSequence", "CaptureSequenceFrequency", "Conditions", "SpaceConditions", "Line", "Connection", "Group", "Contains", "Pattern", "Fill", "Distance", "MoveConditions", "NoMoves", "NoMovesMover", "NoMovesNext", "CanMove", "CanNotMove", "PieceConditions", "NoPiece", "NoPieceMover", "NoPieceNext", "NoTargetPiece", "Threat", "IsEmpty", "IsEnemy", "IsFriend", "IsPieceAt", "LineOfSight", "CountPiecesComparison", "CountPiecesMoverComparison", "CountPiecesNextComparison", "ProgressCheck", "Directions", "AbsoluteDirections", "AllDirections", "AdjacentDirection", "OrthogonalDirection", "DiagonalDirection", "RotationalDirection", "SameLayerDirection", "RelativeDirections", "ForwardDirection", "BackwardDirection", "ForwardsDirection", "BackwardsDirection", "LeftwardDirection", "LeftwardsDirection", "ForwardRightDirection", "BackwardRightDirection", "SameDirection", "OppositeDirection", "Phase", "NumPlayPhase", "Scoring", "PieceCount", "SumDice", "SpaceEnd", "LineEnd", "LineEndFrequency", "LineWin", "LineWinFrequency", "LineLoss", "LineLossFrequency", "LineDraw", "ConnectionEnd", "ConnectionEndFrequency", "ConnectionWin", "ConnectionWinFrequency", "ConnectionLoss", "ConnectionLossFrequency", "GroupEnd", "GroupEndFrequency", "GroupWin", "GroupWinFrequency", "GroupLoss", "GroupDraw", "LoopEnd", "LoopWin", "LoopWinFrequency", "PatternWin", "PatternWinFrequency", "PathExtentLoss", "TerritoryWin", "TerritoryWinFrequency", "CaptureEnd", "Checkmate", "CheckmateFrequency", "CheckmateWin", "CheckmateWinFrequency", "NoTargetPieceEnd", "NoTargetPieceEndFrequency", "NoTargetPieceWin", "NoTargetPieceWinFrequency", "EliminatePiecesEnd", "EliminatePiecesEndFrequency", "EliminatePiecesWin", "EliminatePiecesWinFrequency", "EliminatePiecesLoss", "EliminatePiecesLossFrequency", "EliminatePiecesDraw", "EliminatePiecesDrawFrequency", "RaceEnd", "NoOwnPiecesEnd", "NoOwnPiecesEndFrequency", "NoOwnPiecesWin", "NoOwnPiecesWinFrequency", "NoOwnPiecesLoss", "NoOwnPiecesLossFrequency", "FillEnd", "FillEndFrequency", "FillWin", "FillWinFrequency", "ReachEnd", "ReachEndFrequency", "ReachWin", "ReachWinFrequency", "ReachLoss", "ReachLossFrequency", "ReachDraw", "ReachDrawFrequency", "ScoringEnd", "ScoringEndFrequency", "ScoringWin", "ScoringWinFrequency", "ScoringLoss", "ScoringLossFrequency", "ScoringDraw", "NoMovesEnd", "NoMovesEndFrequency", "NoMovesWin", "NoMovesWinFrequency", "NoMovesLoss", "NoMovesLossFrequency", "NoMovesDraw", "NoMovesDrawFrequency", "NoProgressEnd", "NoProgressDraw", "NoProgressDrawFrequency", "Draw", "DrawFrequency", "Misere", "DurationActions", "DurationMoves", "DurationTurns", "DurationTurnsStdDev", "DurationTurnsNotTimeouts", "DecisionMoves", "GameTreeComplexity", "StateTreeComplexity", "BoardCoverageDefault", "BoardCoverageFull", "BoardCoverageUsed", "AdvantageP1", "Balance", "Completion", "Drawishness", "Timeouts", "OutcomeUniformity", "BoardSitesOccupiedAverage", "BoardSitesOccupiedMedian", "BoardSitesOccupiedMaximum", "BoardSitesOccupiedVariance", "BoardSitesOccupiedChangeAverage", "BoardSitesOccupiedChangeSign", "BoardSitesOccupiedChangeLineBestFit", "BoardSitesOccupiedChangeNumTimes", "BoardSitesOccupiedMaxIncrease", "BoardSitesOccupiedMaxDecrease", "BranchingFactorAverage", "BranchingFactorMedian", "BranchingFactorMaximum", "BranchingFactorVariance", "BranchingFactorChangeAverage", "BranchingFactorChangeSign", "BranchingFactorChangeLineBestFit", "BranchingFactorChangeNumTimesn", "BranchingFactorChangeMaxIncrease", "BranchingFactorChangeMaxDecrease", "DecisionFactorAverage", "DecisionFactorMedian", "DecisionFactorMaximum", "DecisionFactorVariance", "DecisionFactorChangeAverage", "DecisionFactorChangeSign", "DecisionFactorChangeLineBestFit", "DecisionFactorChangeNumTimes", "DecisionFactorMaxIncrease", "DecisionFactorMaxDecrease", "MoveDistanceAverage", "MoveDistanceMedian", "MoveDistanceMaximum", "MoveDistanceVariance", "MoveDistanceChangeAverage", "MoveDistanceChangeSign", "MoveDistanceChangeLineBestFit", "MoveDistanceChangeNumTimes", "MoveDistanceMaxIncrease", "MoveDistanceMaxDecrease", "PieceNumberAverage", "PieceNumberMedian", "PieceNumberMaximum", "PieceNumberVariance", "PieceNumberChangeAverage", "PieceNumberChangeSign", "PieceNumberChangeLineBestFit", "PieceNumberChangeNumTimes", "PieceNumberMaxIncrease", "PieceNumberMaxDecrease", "ScoreDifferenceAverage", "ScoreDifferenceMedian", "ScoreDifferenceMaximum", "ScoreDifferenceVariance", "ScoreDifferenceChangeAverage", "ScoreDifferenceChangeSign", "ScoreDifferenceChangeLineBestFit", "ScoreDifferenceMaxIncrease", "ScoreDifferenceMaxDecrease", "Math", "Arithmetic", "Operations", "Addition", "Subtraction", "Multiplication", "Division", "Modulo", "Absolute", "Exponentiation", "Minimum", "Maximum", "Comparison", "Equal", "NotEqual", "LesserThan", "LesserThanOrEqual", "GreaterThan", "GreaterThanOrEqual", "Parity", "Even", "Odd", "Logic", "Conjunction", "Disjunction", "Negation", "Set", "Union", "Intersection", "Complement", "Algorithmics", "ConditionalStatement", "ControlFlowStatement", "Visual", "Style", "BoardStyle", "GraphStyle", "ChessStyle", "GoStyle", "MancalaStyle", "PenAndPaperStyle", "ShibumiStyle", "BackgammonStyle", "JanggiStyle", "XiangqiStyle", "ShogiStyle", "TableStyle", "SurakartaStyle", "TaflStyle", "NoBoard", "ComponentStyle", "AnimalComponent", "ChessComponent", "KingComponent", "QueenComponent", "KnightComponent", "RookComponent", "BishopComponent", "PawnComponent", "FairyChessComponent", "PloyComponent", "ShogiComponent", "XiangqiComponent", "StrategoComponent", "JanggiComponent", "CheckersComponent", "BallComponent", "TaflComponent", "DiscComponent", "MarkerComponent", "StackType", "Stack", "Symbols", "ShowPieceValue", "ShowPieceState", "Implementation", "State", "StackState", "PieceState", "SiteState", "SetSiteState", "VisitedSites", "Variable", "SetVar", "RememberValues", "ForgetValues", "SetPending", "InternalCounter", "SetInternalCounter", "PlayerValue", "Efficiency", "CopyContext", "Then", "ForEachPiece", "DoLudeme", "Trigger", "PlayoutsPerSecond", "MovesPerSecond", "num_wins_agent1", "num_draws_agent1", "num_losses_agent1", "utility_agent1", "p1_selection", "p1_exploration", "p1_playout", "p1_bounds", "p2_selection", "p2_exploration", "p2_playout", "p2_bounds"]]

## 1- Linear Regression

In [5]:
import sys
import os
sys.path.append(os.path.abspath('../'))

from Models.LinearRegression import LinearRegression
from Utils.Preprocessor import Preprocessor
from Utils.Utils import root_mean_squared_error, train_test_split

In [6]:
X = train.drop(columns=["num_wins_agent1", "num_draws_agent1", "num_losses_agent1", "utility_agent1"], axis=1)
y = train["utility_agent1"]

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
preprocessor = Preprocessor(normalize=True, standardize=False, one_hot_encode=True)

X_train_p = preprocessor.fit_transform(X_train)
X_valid_p = preprocessor.transform(X_valid)

In [None]:
lr_model = LinearRegression(fit_method="ols", loss_function="rmse")
#lr_model = LinearRegression(fit_method="gd", loss_function="rmse", learning_rate=0.01, epochs=10, min_step_size=0.001, gradient_descent='batch')

lr_model.fit(X_train_p, y_train)

train_pred = lr_model.predict(X_train_p)
test_pred = lr_model.predict(X_valid_p)

print("Linear Regression: ")
print("Train mean squared error: ", root_mean_squared_error(y_train, train_pred))
print("Validation mean squared error: ", root_mean_squared_error(y_valid, test_pred))

Train mean squared error:  0.5175135945021986
Test mean squared error:  0.51911678407925


## 2- Lasso Regression

## 3- Ridge Regression