# <center>Approach1: Flatten the Chess Board</center>

## Libraries

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

import pandas as pd
import numpy as np

## Starter

In [2]:
# Load the filtered dataset
df = pd.read_csv("../data/filtered/filtered_df.csv")
df.sample()

Unnamed: 0,index,PuzzleId,FEN,Moves,Rating,RatingDeviation,Popularity,NbPlays,Themes,GameUrl,OpeningTags
1989686,2316254,avnFk,6rk/p1B2p1p/5R2/1Q6/3pr3/6Pq/PP2PP1P/5RK1 b - ...,e4h4 f1a1 h3h2 g1f1 h2h1,1678,75,94,1273,long mate mateIn3 middlegame pin quietMove,https://lichess.org/7YWY9q2u#46,


## Feature Engineering

In [3]:
from src.lib.fen_parser import flatten_board, parse_fen_without_board
from src.lib.move_encoder import encode_moves_64_squares
from src.lib.feature_generator import calculate_move_length
from tqdm import tqdm

In [4]:
tqdm.pandas()
flattened = df['FEN'].apply(flatten_board)
squares = [f"square_{i}" for i in range(1, 65)]
flattened_df = pd.DataFrame(flattened.tolist(), columns=squares)
df = pd.concat([df, flattened_df], axis=1)

In [5]:
combined_features_df = df['FEN'].apply(parse_fen_without_board).apply(pd.Series)
df = pd.concat([df, combined_features_df], axis=1)


In [6]:
# flattened_moves = df['Moves'].apply(
#     lambda moves: encode_moves_64_squares(moves, max_len=60, verbose=False).tolist()
# )
# encoded_moves = pd.DataFrame(flattened_moves.tolist(), columns=[f"move_{i+1}" for i in range(60)])
# df = pd.concat([df, encoded_moves], axis=1)

In [7]:
df['MoveLength'] = df['Moves'].apply(calculate_move_length)

In [8]:
drop_cols = ["index", "PuzzleId", "FEN", "Moves", "RatingDeviation", "Popularity", "NbPlays", "Themes", "GameUrl", "OpeningTags"]
df_final = df.drop(columns=drop_cols)

## Modelling

In [9]:
from src.lib.models.lgbm_learner import LightGBMLearner
from src.lib.train_helper import stratified_split

In [11]:
X_train, y_train, X_val, y_val, X_test, y_test = stratified_split(df_final)
best_parameters = {
            'objective': 'regression',
            'metric': 'mse',
            'boosting_type': 'gbdt',
            'learning_rate': 0.09998048099668717,
            'num_leaves': 148,
            'bagging_fraction': 0.8631413380094287	,
            'bagging_freq': 6,
            'feature_fraction': 1,
            'verbose': 1
}
learner_lgbm = LightGBMLearner(params=best_parameters)
learner_lgbm.train(X_train, y_train, X_val, y_val)
learner_lgbm.evaluate(X_test, y_test)
learner_lgbm.save_model("../results/models/lgbm/filtered_df_flattened_2")


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.906498 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 972
[LightGBM] [Info] Number of data points in the train set: 2137801, number of used features: 73
[LightGBM] [Info] Start training from score 1514.152790
Validation MSE: 176159.9913


In [14]:
learner_lgbm.optimize(X_train, y_train, X_val, y_val, n_trials=100, db_path="sqlite:///lightgbm_optuna_flattened.db")

[I 2024-12-30 14:05:17,512] Using an existing study with name 'lightgbm_optimization' instead of creating a new one.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.1),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.6, 1.0),
[I 2024-12-30 14:05:44,039] Trial 100 finished with value: 190779.64726951125 and parameters: {'learning_rate': 0.08592851153337022, 'num_leaves': 144, 'bagging_fraction': 0.8454424915578198, 'bagging_freq': 6}. Best is trial 87 with value: 189421.80545227422.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.1),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.6, 1.0),
[I 2024-12-30 14:06:07,695] Trial 101 finished with value: 189878.76396199572 and parameters: {'learning_rate': 0.09829368593376851, 'num_leaves': 134, 'bagging_fraction': 0.7403547703989634, 'bagging_freq': 5}. Best is trial 87 with value: 189421.80545227422.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.1),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.6, 1.0),
[I 2024-12-30 14:06:29,586] Trial 

Best hyperparameters: {'learning_rate': 0.09998048099668717, 'num_leaves': 148, 'bagging_fraction': 0.8631413380094287, 'bagging_freq': 6}
Best MSE: 188892.77966050216


{'learning_rate': 0.09998048099668717,
 'num_leaves': 148,
 'bagging_fraction': 0.8631413380094287,
 'bagging_freq': 6}