# <font color= "magenta">  ****Explore Data.****</font>

**1. Import required libraries.**

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from Terminal_Version.Connect4 import Connect4
# from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import (
    StandardScaler,
    MinMaxScaler,
    MaxAbsScaler,
    RobustScaler,
    QuantileTransformer,
    PowerTransformer,
)
import joblib
import os

**2. Create game moves function**

In [5]:
def convert_to_game_moves(flat_board):
    board = np.array(flat_board).reshape(6, 7)
    moves = []
    for col in range(7):
        col_vals = board[:, col]
        pieces = [val for val in col_vals if val != 0]
        for _ in pieces:
            moves.append(col)
    return moves

**3. Data preparation**

In [6]:
def data_set_prep():
    columns = [f'b.{i}' for i in range(42)] + ['outcome']
    df = pd.read_csv(r'Data\connect-4.data\connect-4.data', names=columns)

    mapping = {'x': 1, 'o': -1, 'b': 0}
    df.iloc[:, :-1] = df.iloc[:, :-1].map(mapping.get)

    X_data = []
    y_data = []

    for _, row in df.iterrows():
        board_vals = row[:-1].values
        move_sequence = convert_to_game_moves(board_vals)
        game = Connect4()
        player_map = {1: "●", -1: "○"}
        player_turns = [1 if i % 2 == 0 else -1 for i in range(len(move_sequence))]

        for i, (move, player_id) in enumerate(zip(move_sequence[:-1], player_turns[:-1])):
            flat_numeric_board = np.where(game.board == "●", 1,
                                  np.where(game.board == "○", -1, 0)).flatten()
            turn_count = np.count_nonzero(flat_numeric_board)
            X_data.append(np.append(flat_numeric_board, turn_count))
            y_data.append(move_sequence[i + 1])  # Next move is the target
            game.make_move(move, player_map[player_id])

    X_data = np.array(X_data)
    y_data = np.array(y_data)

    print("Dataset built:")
    print(f"Total training samples: {X_data.shape[0]}")
    # print("Inferred move for that board:", y_data[0])
    return X_data, y_data

In [8]:
def train_model(X, y, model_type=SVC, scaler_type='standard', cv_folds=5, **model_kwargs):
    # Select scaler
    if scaler_type == 'standard':
        scaler = StandardScaler()
    elif scaler_type == 'minmax':
        scaler = MinMaxScaler()
    elif scaler_type == 'maxabs':
        scaler = MaxAbsScaler()
    elif scaler_type == 'robust':
        scaler = RobustScaler()
    elif scaler_type == 'quantile':
        scaler = QuantileTransformer(output_distribution='uniform')
    elif scaler_type == 'quantile-normal':
        scaler = QuantileTransformer(output_distribution='normal')
    elif scaler_type == 'power':
        scaler = PowerTransformer(method='yeo-johnson')
    else:
        raise ValueError(f"Unsupported scaler_type: {scaler_type}")

    # Normalize features
    X_scaled = scaler.fit_transform(X)

    # Split for hold-out test evaluation
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42, shuffle=True
    )

    # Instantiate model
    model = model_type(**model_kwargs)
    print(f"\nTraining {model_type.__name__} with {scaler_type} scaler...")

    # Cross-validation
    cv_scores = cross_val_score(model, X_train, y_train, cv=cv_folds)
    print(f"Cross-Validation (k={cv_folds}) Accuracy: {cv_scores.mean() * 100:.2f}% ± {cv_scores.std() * 100:.2f}%")

    # Final fit on full training set
    model.fit(X_train, y_train)

    # Hold-out test evaluation
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    report = classification_report(y_test, predictions, zero_division=0)

    print(f"Hold-out Test Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:\n", report)

    return model

In [9]:
def predict_move(board, model):
    board_arr = np.array(board).reshape(1, -1)
    predicted_move = int(round(model.predict(board_arr)[0]))
    return max(0, min(6, predicted_move))

In [10]:
def choose_move(game, model, player=1):
    opponent = -player
    player_map = {1: "●", -1: "○"}
    opponent_symbol = player_map[opponent]
    available_cols = game.get_available_moves(game.board)

    # Check if the AI can block the opponent's winning move
    for col in available_cols:
        temp_board = game.drop_piece(game.board.copy(), col, opponent_symbol)
        if temp_board is not None and game.check_winner(opponent_symbol, temp_board):
            return col  # Block opponent's winning move

    # Use the trained model to predict the next move
    flat_board = np.where(game.board == "●", 1,
                          np.where(game.board == "○", -1, 0)).flatten()
    turn_count = np.count_nonzero(flat_board)
    input_features = np.append(flat_board, turn_count)

    # Predict the move using the trained model
    predicted_move = predict_move(input_features, model)

    # Ensure the predicted move is valid
    if predicted_move in available_cols:
        return predicted_move
    else:
        # Default to a random valid move if the prediction is invalid
        return np.random.choice(available_cols) if available_cols else None

In [11]:
def play_game(model):
    game = Connect4()
    current_player = 1
    player_map = {1: "●", -1: "○"}

    while True:
        game.display_board()
        available_cols = game.get_available_moves(game.board)
        print(f"Available columns: {available_cols}")

        if current_player == 1:
            # Human player move
            while True:
                try:
                    col = int(input("Enter column (0-6): "))
                    if col in available_cols:
                        game.make_move(col, player_map[current_player])
                        break
                    else:
                        print("Column full or invalid.")
                except ValueError:
                    print("Invalid input.")
        else:
            # AI move
            print("AI's move:")
            col = choose_move(game, model, player=-1)
            if col is not None:
                print(f"AI chooses column {col}")
                game.make_move(col, player_map[current_player])
            else:
                print("AI could not make a move!")
                break

        if game.check_winner("○"):
            game.display_board()
            print("AI wins!")
            break
        elif game.check_winner("●"):
            game.display_board()
            print("You win!")
            break
        elif game.is_full(game.board):
            game.display_board()
            print("It's a draw!")
            break

        # Switch player
        current_player *= -1

In [12]:
if __name__ == "__main__":
    try:
        X, y = data_set_prep()

        model_choice = input("Choose model (logistic, forest, lsvc): ").strip().lower()

        if model_choice == "forest":
            model_type = RandomForestClassifier
            model_kwargs = {"n_estimators": 1000}
        elif model_choice == "lsvc":
            from sklearn.svm import LinearSVC
            model_type = LinearSVC
            model_kwargs = {}
        else:
            model_type = LogisticRegression
            model_kwargs = {"max_iter": 5000}

        scaler_types = [
            'standard',
            'minmax',
            'maxabs',
            'robust',
            'quantile',
            'quantile-normal',
            'power'
        ]

        for scaler_type in scaler_types:
            print(f"\n--- Training with scaler: {scaler_type} ---")
            model = train_model(X, y, model_type=model_type, scaler_type=scaler_type, **model_kwargs)

        print("\nAll models trained with different scalers.")
        # play_game(model)  # This will use the *last* trained model

    except Exception as e:
        print(f"An error occurred: {e}")

Dataset built:
Total training samples: 472899

--- Training with scaler: standard ---

Training LogisticRegression with standard scaler...
Cross-Validation (k=5) Accuracy: 55.54% ± 0.12%
Hold-out Test Accuracy: 55.47%
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00      5437
           1       0.50      0.98      0.66     15639
           2       0.44      0.36      0.40     14691
           3       0.44      0.17      0.24     12059
           4       0.48      0.23      0.31     12735
           5       0.50      0.75      0.60     16377
           6       0.88      0.83      0.85     17642

    accuracy                           0.55     94580
   macro avg       0.46      0.47      0.44     94580
weighted avg       0.52      0.55      0.51     94580


--- Training with scaler: minmax ---

Training LogisticRegression with minmax scaler...
Cross-Validation (k=5) Accuracy: 55.51% ± 0.11%
Hold-out Test Accuracy: 

In [13]:
if __name__ == "__main__":
    try:
        X, y = data_set_prep()

        model_choice = input("Choose model (logistic, forest, lsvc): ").strip().lower()

        if model_choice == "forest":
            model_type = RandomForestClassifier
            model_kwargs = {"n_estimators": 1000}
        elif model_choice == "lsvc":
            from sklearn.svm import LinearSVC
            model_type = LinearSVC
            model_kwargs = {}
        else:
            model_type = LogisticRegression
            model_kwargs = {"max_iter": 5000}

        scaler_types = [
            'standard',
            'minmax',
            'maxabs',
            'robust',
            'quantile',
            'quantile-normal',
            'power'
        ]

        for scaler_type in scaler_types:
            print(f"\n--- Training with scaler: {scaler_type} ---")
            model = train_model(X, y, model_type=model_type, scaler_type=scaler_type, **model_kwargs)

        print("\nAll models trained with different scalers.")
        # play_game(model)  # This will use the *last* trained model

    except Exception as e:
        print(f"An error occurred: {e}")

Dataset built:
Total training samples: 472899

--- Training with scaler: standard ---

Training RandomForestClassifier with standard scaler...
Cross-Validation (k=5) Accuracy: 59.27% ± 0.08%
Hold-out Test Accuracy: 59.47%
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00      5437
           1       0.50      0.98      0.66     15639
           2       0.46      0.48      0.47     14691
           3       0.47      0.30      0.37     12059
           4       0.59      0.23      0.33     12735
           5       0.64      0.73      0.68     16377
           6       0.90      0.87      0.88     17642

    accuracy                           0.59     94580
   macro avg       0.51      0.51      0.48     94580
weighted avg       0.57      0.59      0.56     94580


--- Training with scaler: minmax ---

Training RandomForestClassifier with minmax scaler...
Cross-Validation (k=5) Accuracy: 59.27% ± 0.07%
Hold-out Test Ac

In [14]:
if __name__ == "__main__":
    try:
        X, y = data_set_prep()

        model_choice = input("Choose model (logistic, forest, lsvc): ").strip().lower()

        if model_choice == "forest":
            model_type = RandomForestClassifier
            model_kwargs = {"n_estimators": 1000}
        elif model_choice == "lsvc":
            from sklearn.svm import LinearSVC
            model_type = LinearSVC
            model_kwargs = {}
        elif model_choice == "mlp":
            from sklearn.neural_network import MLPClassifier
            model_type = MLPClassifier
            model_kwargs = {}      
        else:
            model_type = LogisticRegression
            model_kwargs = {"max_iter": 5000}

        scaler_types = [
            'standard',
            'power'
        ]

        for scaler_type in scaler_types:
            print(f"\n--- Training with scaler: {scaler_type} ---")
            model = train_model(X, y, model_type=model_type, scaler_type=scaler_type, **model_kwargs)

        print("\nAll models trained with different scalers.")
        # play_game(model)  # This will use the *last* trained model

    except Exception as e:
        print(f"An error occurred: {e}")

Dataset built:
Total training samples: 472899

--- Training with scaler: standard ---

Training MLPClassifier with standard scaler...
Cross-Validation (k=5) Accuracy: 59.27% ± 0.07%
Hold-out Test Accuracy: 59.18%
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00      5437
           1       0.50      0.98      0.66     15639
           2       0.48      0.44      0.46     14691
           3       0.47      0.19      0.27     12059
           4       0.53      0.34      0.42     12735
           5       0.60      0.76      0.67     16377
           6       0.91      0.85      0.88     17642

    accuracy                           0.59     94580
   macro avg       0.50      0.51      0.48     94580
weighted avg       0.56      0.59      0.55     94580


--- Training with scaler: power ---

Training MLPClassifier with power scaler...
Cross-Validation (k=5) Accuracy: 59.17% ± 0.12%
Hold-out Test Accuracy: 59.35%
Class

In [15]:
if __name__ == "__main__":
    try:
        X, y = data_set_prep()

        model_choice = input("Choose model (logistic, forest, lsvc): ").strip().lower()

        if model_choice == "forest":
            model_type = RandomForestClassifier
            model_kwargs = {"n_estimators": 1000}
        elif model_choice == "lsvc":
            from sklearn.svm import LinearSVC
            model_type = LinearSVC
            model_kwargs = {}
        elif model_choice == "mlp":
            from sklearn.neural_network import MLPClassifier
            model_type = MLPClassifier
            model_kwargs = {}      
        else:
            model_type = LogisticRegression
            model_kwargs = {"max_iter": 5000}

        scaler_types = [
            'standard',
            'power'
        ]

        for scaler_type in scaler_types:
            print(f"\n--- Training with scaler: {scaler_type} ---")
            model = train_model(X, y, model_type=model_type, scaler_type=scaler_type, **model_kwargs)

        print("\nAll models trained with different scalers.")
        # play_game(model)  # This will use the *last* trained model

    except Exception as e:
        print(f"An error occurred: {e}")

Dataset built:
Total training samples: 472899

--- Training with scaler: standard ---

Training LinearSVC with standard scaler...
Cross-Validation (k=5) Accuracy: 50.11% ± 0.14%
Hold-out Test Accuracy: 50.13%
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00      5437
           1       0.50      0.98      0.66     15639
           2       0.43      0.36      0.40     14691
           3       0.34      0.07      0.12     12059
           4       0.11      0.01      0.02     12735
           5       0.40      0.71      0.51     16377
           6       0.77      0.80      0.78     17642

    accuracy                           0.50     94580
   macro avg       0.36      0.42      0.36     94580
weighted avg       0.42      0.50      0.42     94580


--- Training with scaler: power ---

Training LinearSVC with power scaler...
Cross-Validation (k=5) Accuracy: 50.23% ± 0.08%
Hold-out Test Accuracy: 50.30%
Classificatio