In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.impute import KNNImputer
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neighbors import KNeighborsRegressor, LocalOutlierFactor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from catboost import CatBoostRegressor
import os
import pyarrow as pa
import pyarrow.parquet as pq
import polars as pl
seed=42
np.random.seed(seed)
import kaggle_evaluation.mcts_inference_server

In [None]:
train_path = "/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv"
train_data = pd.read_csv(train_path)
train_data

In [None]:
train_v1=train_data.copy()

In [None]:
train_v1[['agent1_format','agent1_strategies', 'agent1_EXP_const', 'agent1_PLAYOUT','agent1_SCORE_BOUNDS']] = train_v1['agent1'].str.split('-', expand=True)
train_v1[['agent2_format','agent2_strategies', 'agent2_EXP_const', 'agent2_PLAYOUT','agent2_SCORE_BOUNDS']] = train_v1['agent2'].str.split('-', expand=True)

In [None]:
train_v1['agent1_EXP_const'] = train_v1['agent1_EXP_const'].astype(float)
train_v1['agent2_EXP_const'] = train_v1['agent2_EXP_const'].astype(float)

In [None]:
GameRulese_Encoder= LabelEncoder()
strategies_Encoder= LabelEncoder()
PLAYOUT_Encoder   = LabelEncoder()
SCORE_Encoder     = LabelEncoder()
train_v1['GameRulesetName']=GameRulese_Encoder.fit_transform(train_v1['GameRulesetName'])
train_v1['agent1_strategies']=strategies_Encoder.fit_transform(train_v1['agent1_strategies'])
train_v1['agent2_strategies']=strategies_Encoder.transform(train_v1['agent2_strategies'])
train_v1['agent1_PLAYOUT']=PLAYOUT_Encoder.fit_transform(train_v1['agent1_PLAYOUT'])
train_v1['agent2_PLAYOUT']=PLAYOUT_Encoder.transform(train_v1['agent2_PLAYOUT'])
train_v1['agent1_SCORE_BOUNDS']=SCORE_Encoder.fit_transform(train_v1['agent1_SCORE_BOUNDS'])
train_v1['agent2_SCORE_BOUNDS']=SCORE_Encoder.transform(train_v1['agent2_SCORE_BOUNDS'])


In [None]:
feature= ['BoardSitesOccupiedChangeSign', 'agent2_EXP_const', 'GameRulesetName', 'AdvantageP1', 'PlayoutsPerSecond', 'agent1_PLAYOUT', 'agent1_strategies', 'agent2_strategies', 'DurationMoves', 'DurationActions', 'MovesPerSecond', 'OutcomeUniformity', 'PieceNumberMedian', 'agent2_PLAYOUT', 'DurationTurnsNotTimeouts', 'GameTreeComplexity', 'BranchingFactorMedian', 'DrawFrequency', 'agent1_EXP_const', 'DurationTurnsStdDev']

In [None]:
X=train_v1[feature]
y = train_v1['utility_agent1']

In [None]:
X.info()

In [None]:
# Define model training function
def train_model(X_train, y_train):
    print("start model train")
    model = XGBRegressor(n_estimators=3000, learning_rate=0.05, max_depth=15)
    model.fit(X_train, y_train)
    print("end model train")
    return model


In [None]:
def clean_data(test_df):
    global GameRulese_Encoder, strategies_Encoder, PLAYOUT_Encoder, SCORE_Encoder, feature 
    print("start data preprocess")
    
    # Splitting columns
    test_df[['agent1_format','agent1_strategies', 'agent1_EXP_const', 'agent1_PLAYOUT','agent1_SCORE_BOUNDS']] = test_df['agent1'].str.split('-', expand=True)
    test_df[['agent2_format','agent2_strategies', 'agent2_EXP_const', 'agent2_PLAYOUT','agent2_SCORE_BOUNDS']] = test_df['agent2'].str.split('-', expand=True)
    
    # GameRulesetName encoding with handling unseen categories
    test_df['GameRulesetName'] = test_df['GameRulesetName'].apply(lambda x: GameRulese_Encoder.transform([x])[0] if x in GameRulese_Encoder.classes_ else -1)
    
    # Converting EXP_const columns to float
    test_df['agent1_EXP_const'] = test_df['agent1_EXP_const'].astype(float)
    test_df['agent2_EXP_const'] = test_df['agent2_EXP_const'].astype(float)
    
    # Handling unseen values in agent1_SCORE_BOUNDS and agent2_SCORE_BOUNDS
    test_df['agent1_SCORE_BOUNDS'] = test_df['agent1_SCORE_BOUNDS'].apply(lambda x: SCORE_Encoder.transform([x])[0] if x in SCORE_Encoder.classes_ else -1)
    test_df['agent2_SCORE_BOUNDS'] = test_df['agent2_SCORE_BOUNDS'].apply(lambda x: SCORE_Encoder.transform([x])[0] if x in SCORE_Encoder.classes_ else -1)
    
    # Handling unseen values in agent strategies
    test_df['agent1_strategies'] = test_df['agent1_strategies'].apply(lambda x: strategies_Encoder.transform([x])[0] if x in strategies_Encoder.classes_ else -1)
    test_df['agent2_strategies'] = test_df['agent2_strategies'].apply(lambda x: strategies_Encoder.transform([x])[0] if x in strategies_Encoder.classes_ else -1)
    
    # Handling unseen values in agent PLAYOUT
    test_df['agent1_PLAYOUT'] = test_df['agent1_PLAYOUT'].apply(lambda x: PLAYOUT_Encoder.transform([x])[0] if x in PLAYOUT_Encoder.classes_ else -1)
    test_df['agent2_PLAYOUT'] = test_df['agent2_PLAYOUT'].apply(lambda x: PLAYOUT_Encoder.transform([x])[0] if x in PLAYOUT_Encoder.classes_ else -1)
    
    # Convert strategies to float
    test_df['agent1_strategies'] = test_df['agent1_strategies'].astype(float)
    
    # Fill NaN and process features
    test = test_df[feature].fillna(0)
    
    print("end data process")
    return test


In [None]:
counter = 0
model = None  # Initialize model as None globally

def predict(test: pl.DataFrame, sample_sub: pl.DataFrame):
    global counter, model, X, y  # Declare all global variables at the start
    print(1)
    test_df = test.to_pandas()

    # Clean the data
    test_df = clean_data(test_df)
    print(2)

    # Train the model only once
    if counter == 0:
        model = train_model(X, y)
        counter += 1

    print("start predict")
    # Return NaN values on failure
    try:
        predictions = model.predict(test_df)
    except Exception as e:
        print(f"Error during prediction: {e}")
        return None

    # Summation step: add a constant value to the predictions
    
    print(predictions)
    # Prepare the final submission format
    submission = sample_sub.with_columns(pl.Series("utility_agent1", predictions))
    print(submission)


    return submission

In [None]:
inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
        )    
    )