<p style="background-color: #1B1212; font-size: 300%; text-align: center; border-radius: 40px 40px; color: #C9A9A6; font-weight: bold; font-family: 'Cinzel', serif; text-transform: uppercase; border: 4px solid #C9A9A6;">imports</p>

In [None]:
# 필요한 라이브러리 및 설정
import os
import sys
from pathlib import Path
import polars as pl  # Pandas와 비슷하지만 대규모 데이터 처리에 더 나은 성능 제공
import pandas as pd  # CSV 파일을 로드하고 처리하는 라이브러리
import numpy as np  # 수학적 계산 및 행렬 연산 라이브러리

# KFold 관련 라이브러리
from sklearn.model_selection import StratifiedGroupKFold, GroupKFold  # Stratified 및 Group 기반의 K-Fold 교차 검증
from sklearn.metrics import mean_squared_error as mse  # RMSE 계산용 함수

# 모델 관련 라이브러리
import lightgbm as lgb  # LightGBM 라이브러리
from lightgbm import LGBMRegressor, log_evaluation, early_stopping
from catboost import CatBoostRegressor  # CatBoost 모델 라이브러리
from xgboost import XGBRegressor  # XGBoost 모델 라이브러리

import dill  # Python 객체 직렬화/역직렬화 (모델 저장 및 로드)
from sklearn.feature_extraction.text import TfidfVectorizer  # 텍스트 데이터를 TF-IDF 특성으로 변환
import re  # 정규 표현식 라이브러리
import gc  # 가비지 컬렉션 (메모리 정리)
import matplotlib.pyplot as plt  # 시각화 라이브러리
import plotly.graph_objects as go  # Plotly 기반의 고급 그래프 생성
import warnings  # 경고 메시지 무시 설정
from sklearn.preprocessing import StandardScaler  # 데이터 표준화

# PyTorch (딥러닝 프레임워크)
import torch
import torch.nn as nn  # Neural Network 모듈
import torch.optim as optim  # Optimizer 모듈
from torch.utils.data import Dataset, DataLoader  # Dataset 및 DataLoader 정의

# 경고 메시지 무시
warnings.filterwarnings('ignore')  # filterwarnings()를 통해 불필요한 경고 메시지 숨김

import random  # 랜덤 관련 함수
sys.path.append("/kaggle/input/um-game-playing-strength-of-mcts-variants")  # MCTS 관련 모듈 경로 추가
import kaggle_evaluation.mcts_inference_server  # MCTS 대회용 API

# Pandas 옵션 설정 (모든 행과 열 출력)
pd.options.display.max_rows = None
pd.options.display.max_columns = None

# 설정 클래스 정의
class APP:
    small_iterations = True  # 작은 데이터셋 반복 설정
    short_dataset = True  # 간소화된 데이터셋 사용 여부
    test_full_dataset = False  # 전체 테스트 데이터셋 사용 여부
    local = os.environ.get("DOCKER_USING", "") == "LOCAL"  # 로컬 실행 여부 확인
    submit = os.environ.get('KAGGLE_IS_COMPETITION_RERUN', "") != ""  # 제출 여부 확인
    path_root = Path('/kaggle/input')  # 데이터 경로
    input_path = path_root / 'um-game-playing-strength-of-mcts-variants'
    train_file = input_path / 'train.csv'  # 학습 데이터 경로
    test_file = input_path / ('test_full.csv' if test_full_dataset else 'test.csv')  # 테스트 데이터 경로
    sample_subm_file = input_path / ('sample_subm_full.csv' if test_full_dataset else 'sample_submission.csv')  # 샘플 제출 경로

    # 대회 제출 환경에서는 간소화 설정 해제
    if submit:
        small_iterations = False
        short_dataset = False
        test_full_dataset = False

# 랜덤 시드 설정 함수 (재현 가능성 확보)
def seed_everything(seed):
    np.random.seed(seed)  # NumPy 랜덤 시드 설정
    random.seed(seed)  # Python 내장 랜덤 시드 설정
    # 필요한 경우 Torch 및 기타 라이브러리 시드 설정 가능

seed_everything(seed=2024)  # 고정된 랜덤 시드 값


---
---
# **》》》Model1**
---
---

In [None]:
class model_1:
    train=pl.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv")
    train=train.to_pandas()
    print(f"len(train):{len(train)}")
    test=pl.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv")
    test=test.to_pandas()
    print(f"len(test):{len(test)}")
    test.head()

    class Preprocessor():
        def __init__(self,seed=2024,target='utility_agent1',train=None,num_folds=10,CV_LB_path="/kaggle/input/mcts-eda-about-cv-and-lb/1018CV_LB.csv"):
            self.seed=seed
            self.target=target
            self.train=train
            self.model_paths = []  # 학습 및 추론용 모델 경로
            self.tfidf_paths = []  # 문자열의 TF-IDF 모델 경로
            self.num_folds = num_folds
            # CV와 LB 상태 확인용 데이터 (현재 비활성화)
            # self.check = pd.read_csv(CV_LB_path)
            
            # 문자열 컬럼 전처리
        def clean(self, df, col):
            # 문자열의 결측값을 "nan"으로 대체
            df[col] = df[col].fillna("nan")
            # 문자열을 소문자로 변환
            df[col] = df[col].apply(lambda x: x.lower())
            # 특정 특수문자들을 제거
            ps = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
            for p in ps:
                df[col] = df[col].apply(lambda x: x.replace(p, ' '))
            return df
        
            # ARI (자동 읽기 가능성 지수): 텍스트의 가독성을 평가하는 척도
        def ARI(self, txt):
            characters = len(txt)  # 총 문자 수
            words = len(re.split(r' |\n|\.|\?|\!|\,', txt))  # 단어 수
            sentence = len(re.split(r'\.|\?|\!', txt))  # 문장 수
            ari_score = 4.71 * (characters / words) + 0.5 * (words / sentence) - 21.43
            return ari_score
    
        # McAlpine EFLAW 테스트: 가독성 점수를 계산
        def McAlpine_EFLAW(self, txt):
            words = len(re.split(r' |\n|\.|\?|\!|\,', txt))  # 단어 수
            sentences = len(re.split(r'\.|\?|\!', txt))  # 문장 수
            mcalpine_eflaw_score = (words + sentences * words) / sentences
            return mcalpine_eflaw_score
    
        # CLRI (콜맨-리아우 가독성 지수): 텍스트 가독성을 평가
        def CLRI(self, txt):
            characters = len(txt)  # 총 문자 수
            words = len(re.split(r' |\n|\.|\?|\!|\,', txt))  # 단어 수
            sentences = len(re.split(r'\.|\?|\!', txt))  # 문장 수
            L = 100 * characters / words  # 단어당 평균 문자 수
            S = 100 * sentences / words  # 100단어당 문장 수
            clri_score = 0.0588 * L - 0.296 * S - 15.8
            return clri_score
    
        # 객체를 지정된 경로에 저장
        def pickle_dump(self, obj, path):
            with open(path, mode="wb") as f:
                dill.dump(obj, f, protocol=4)
    
        # 지정된 경로에서 객체를 로드
        def pickle_load(self, path):
            with open(path, mode="rb") as f:
                data = dill.load(f)
            return data
        
        def reduce_mem_usage(self, df, float16_as32=True):
            """
            데이터프레임의 각 열 데이터 타입을 변경하여 메모리 사용량을 줄입니다.
        
            Args:
                df (pd.DataFrame): 입력 데이터프레임
                float16_as32 (bool): True이면 float16 대신 float32로 변환
        
            Returns:
                pd.DataFrame: 메모리 최적화된 데이터프레임
            """
        # 초기 메모리 사용량 계산
            start_mem = df.memory_usage().sum() / 1024**2
            print(f'초기 메모리 사용량: {start_mem:.2f} MB')
    
            # 각 열을 순회하며 데이터 타입 최적화
            for col in df.columns:
                col_type = df[col].dtype
        
                # object나 category가 아닌 경우만 처리 (숫자형 데이터만 해당)
                if col_type != object and str(col_type) != 'category':
                    c_min, c_max = df[col].min(), df[col].max()  # 최소값과 최대값 계산
        
                    if str(col_type)[:3] == 'int':  # 정수형 처리
                        # 값의 범위에 따라 적절한 정수형 데이터 타입으로 변환
                        if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                            df[col] = df[col].astype(np.int8)
                        elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                            df[col] = df[col].astype(np.int16)
                        elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                            df[col] = df[col].astype(np.int32)
                        elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                            df[col] = df[col].astype(np.int64)
                    else:  # 실수형 처리
                        # 값의 범위에 따라 적절한 실수형 데이터 타입으로 변환
                        if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                            if float16_as32:  # float16 대신 float32 사용 여부에 따라
                                df[col] = df[col].astype(np.float32)
                            else:
                                df[col] = df[col].astype(np.float16)
                        elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                            df[col] = df[col].astype(np.float32)
                        else:
                            df[col] = df[col].astype(np.float64)
    
            # 최적화 후 메모리 사용량 계산
            end_mem = df.memory_usage().sum() / 1024**2
            print(f'최적화 후 메모리 사용량: {end_mem:.2f} MB')
            print(f'감소율: {100 * (start_mem - end_mem) / start_mem:.1f}%')
        
            return df
            
        def FE(self,df,mode='train'):
            print(f"FE:{mode}")

            print("agent position feature")
            # 모든 에이전트 이름 리스트
            total_agent=['MCTS-ProgressiveHistory-0.1-MAST-false', 'MCTS-ProgressiveHistory-0.1-MAST-true', 'MCTS-ProgressiveHistory-0.1-NST-false', 'MCTS-ProgressiveHistory-0.1-NST-true', 'MCTS-ProgressiveHistory-0.1-Random200-false', 'MCTS-ProgressiveHistory-0.1-Random200-true', 'MCTS-ProgressiveHistory-0.6-MAST-false', 'MCTS-ProgressiveHistory-0.6-MAST-true', 'MCTS-ProgressiveHistory-0.6-NST-false', 'MCTS-ProgressiveHistory-0.6-NST-true', 'MCTS-ProgressiveHistory-0.6-Random200-false', 'MCTS-ProgressiveHistory-0.6-Random200-true', 'MCTS-ProgressiveHistory-1.41421356237-MAST-false', 'MCTS-ProgressiveHistory-1.41421356237-MAST-true', 'MCTS-ProgressiveHistory-1.41421356237-NST-false', 'MCTS-ProgressiveHistory-1.41421356237-NST-true', 'MCTS-ProgressiveHistory-1.41421356237-Random200-false', 'MCTS-ProgressiveHistory-1.41421356237-Random200-true', 'MCTS-UCB1-0.1-MAST-false', 'MCTS-UCB1-0.1-MAST-true', 'MCTS-UCB1-0.1-NST-false', 'MCTS-UCB1-0.1-NST-true', 'MCTS-UCB1-0.1-Random200-false', 'MCTS-UCB1-0.1-Random200-true', 'MCTS-UCB1-0.6-MAST-false', 'MCTS-UCB1-0.6-MAST-true', 'MCTS-UCB1-0.6-NST-false', 'MCTS-UCB1-0.6-NST-true', 'MCTS-UCB1-0.6-Random200-false', 'MCTS-UCB1-0.6-Random200-true', 'MCTS-UCB1-1.41421356237-MAST-false', 'MCTS-UCB1-1.41421356237-MAST-true', 'MCTS-UCB1-1.41421356237-NST-false', 'MCTS-UCB1-1.41421356237-NST-true', 'MCTS-UCB1-1.41421356237-Random200-false', 'MCTS-UCB1-1.41421356237-Random200-true', 'MCTS-UCB1GRAVE-0.1-MAST-false', 'MCTS-UCB1GRAVE-0.1-MAST-true', 'MCTS-UCB1GRAVE-0.1-NST-false', 'MCTS-UCB1GRAVE-0.1-NST-true', 'MCTS-UCB1GRAVE-0.1-Random200-false', 'MCTS-UCB1GRAVE-0.1-Random200-true', 'MCTS-UCB1GRAVE-0.6-MAST-false', 'MCTS-UCB1GRAVE-0.6-MAST-true', 'MCTS-UCB1GRAVE-0.6-NST-false', 'MCTS-UCB1GRAVE-0.6-NST-true', 'MCTS-UCB1GRAVE-0.6-Random200-false', 'MCTS-UCB1GRAVE-0.6-Random200-true', 'MCTS-UCB1GRAVE-1.41421356237-MAST-false', 'MCTS-UCB1GRAVE-1.41421356237-MAST-true', 'MCTS-UCB1GRAVE-1.41421356237-NST-false', 'MCTS-UCB1GRAVE-1.41421356237-NST-true', 'MCTS-UCB1GRAVE-1.41421356237-Random200-false', 'MCTS-UCB1GRAVE-1.41421356237-Random200-true', 'MCTS-UCB1Tuned-0.1-MAST-false', 'MCTS-UCB1Tuned-0.1-MAST-true', 'MCTS-UCB1Tuned-0.1-NST-false', 'MCTS-UCB1Tuned-0.1-NST-true', 'MCTS-UCB1Tuned-0.1-Random200-false', 'MCTS-UCB1Tuned-0.1-Random200-true', 'MCTS-UCB1Tuned-0.6-MAST-false', 'MCTS-UCB1Tuned-0.6-MAST-true', 'MCTS-UCB1Tuned-0.6-NST-false', 'MCTS-UCB1Tuned-0.6-NST-true', 'MCTS-UCB1Tuned-0.6-Random200-false', 'MCTS-UCB1Tuned-0.6-Random200-true', 'MCTS-UCB1Tuned-1.41421356237-MAST-false', 'MCTS-UCB1Tuned-1.41421356237-MAST-true', 'MCTS-UCB1Tuned-1.41421356237-NST-false', 'MCTS-UCB1Tuned-1.41421356237-NST-true', 'MCTS-UCB1Tuned-1.41421356237-Random200-false', 'MCTS-UCB1Tuned-1.41421356237-Random200-true']
            # 'agent1'과 'agent2' 열의 값을 배열로 가져오기
            agent1,agent2=df['agent1'].values,df['agent2'].values
            for i in range(len(total_agent)): # 각 에이전트에 대해 반복
                value=np.zeros(len(df))
                for j in range(len(df)):# agent1이 현재 에이전트인 경우 값을 +1 증가
                    if agent1[j]==total_agent[i]:# agent2가 현재 에이전트인 경우 값을 -1 감소
                        value[j]+=1
                    elif agent2[j]==total_agent[i]:
                        value[j]-=1
                df[f'agent_{total_agent[i]}']=value # 새로운 피처를 데이터프레임에 추가

            
            df['area']=df['NumRows']*df['NumColumns']
            df['row_equal_col']=(df['NumColumns']==df['NumRows']).astype(np.int8)
            df['Playouts/Moves'] = df['PlayoutsPerSecond'] / (df['MovesPerSecond'] + 1e-15)
            df['EfficiencyPerPlayout'] = df['MovesPerSecond'] / (df['PlayoutsPerSecond'] + 1e-15)
            df['TurnsDurationEfficiency'] = df['DurationActions'] / (df['DurationTurnsStdDev'] + 1e-15)
            df['AdvantageBalanceRatio'] = df['AdvantageP1'] / (df['Balance'] + 1e-15)
            df['ActionTimeEfficiency'] = df['DurationActions'] / (df['MovesPerSecond'] + 1e-15)
            df['StandardizedTurnsEfficiency'] = df['DurationTurnsStdDev'] / (df['DurationActions'] + 1e-15)
            df['AdvantageTimeImpact'] = df['AdvantageP1'] / (df['DurationActions'] + 1e-15)
            df['DurationToComplexityRatio'] = df['DurationActions'] / (df['StateTreeComplexity'] + 1e-15)
            df['NormalizedGameTreeComplexity'] =  df['GameTreeComplexity'] /  (df['StateTreeComplexity'] + 1e-15)
            df['ComplexityBalanceInteraction'] =  df['Balance'] *  df['GameTreeComplexity']
            df['OverallComplexity'] =  df['StateTreeComplexity'] +  df['GameTreeComplexity']
            df['ComplexityPerPlayout'] =  df['GameTreeComplexity'] /  (df['PlayoutsPerSecond'] + 1e-15)
            df['TurnsNotTimeouts/Moves'] = df['DurationTurnsNotTimeouts'] / (df['MovesPerSecond'] + 1e-15)
            df['Timeouts/DurationActions'] = df['Timeouts'] / (df['DurationActions'] + 1e-15)
            df['OutcomeUniformity/AdvantageP1'] = df['OutcomeUniformity'] / (df['AdvantageP1'] + 1e-15)
            df['ComplexDecisionRatio'] = df['StepDecisionToEnemy'] + df['SlideDecisionToEnemy'] + df['HopDecisionMoreThanOne']
            df['AggressiveActionsRatio'] = df['StepDecisionToEnemy'] + df['HopDecisionEnemyToEnemy'] + df['HopDecisionFriendToEnemy'] + df['SlideDecisionToEnemy']
            
            print("deal with outliers")
            df['PlayoutsPerSecond']=df['PlayoutsPerSecond'].clip(0,25000)
            df['MovesPerSecond']=df['MovesPerSecond'].clip(0,1000000)
            
            print("agent1 agent2 feature")
            cols=['selection','exploration_const','playout','score_bounds']
            for i in range(len(cols)):
                for j in range(2):
                    df[f'{cols[i]}{j+1}']=df[f'agent{j+1}'].apply(lambda x:x.split('-')[i+1])
            

            print(f"one_hot_encoder")
            # One-hot encoding을 적용
            # One-hot encoding을 적용할 열과 해당 열의 고유값 리스트
            onehot_cols=[['NumOffDiagonalDirections', [0.0, 4.82, 2.0, 5.18, 3.08, 0.06]], ['NumLayers', [1, 0, 4, 5]], ['NumPhasesBoard', [3, 2, 1, 5, 4]], ['NumContainers', [1, 4, 3, 2]], ['NumDice', [0, 2, 1, 4, 6, 3, 5, 7]], ['ProposeDecisionFrequency', [0.0, 0.05, 0.01]], ['PromotionDecisionFrequency', [0.0, 0.01, 0.03, 0.02, 0.11, 0.05, 0.04]], ['SlideDecisionToFriendFrequency', [0.0, 0.19, 0.06]], ['LeapDecisionToEnemyFrequency', [0.0, 0.04, 0.01, 0.02, 0.07, 0.03, 0.14, 0.08]], ['HopDecisionFriendToFriendFrequency', [0.0, 0.13, 0.09]], ['HopDecisionEnemyToEnemyFrequency', [0.0, 0.01, 0.2, 0.03]], ['HopDecisionFriendToEnemyFrequency', [0.0, 0.01, 0.09, 0.25, 0.02]], ['FromToDecisionFrequency', [0.0, 0.38, 1.0, 0.31, 0.94, 0.67]], ['ProposeEffectFrequency', [0.0, 0.01, 0.03]], ['PushEffectFrequency', [0.0, 0.5, 0.96, 0.25]], ['FlipFrequency', [0.0, 0.87, 1.0, 0.96]], ['SetCountFrequency', [0.0, 0.62, 0.54, 0.02]], ['DirectionCaptureFrequency', [0.0, 0.55, 0.54]], ['EncloseCaptureFrequency', [0.0, 0.08, 0.1, 0.07, 0.12, 0.02, 0.09]], ['InterveneCaptureFrequency', [0.0, 0.01, 0.14, 0.04]], ['SurroundCaptureFrequency', [0.0, 0.01, 0.03, 0.02]], ['NumPlayPhase', [1, 2, 3, 4, 5, 6, 7, 8]], ['LineLossFrequency', [0.0, 0.96, 0.87, 0.46, 0.26, 0.88, 0.94]], ['ConnectionEndFrequency', [0.0, 0.19, 1.0, 0.23, 0.94, 0.35, 0.97]], ['ConnectionLossFrequency', [0.0, 0.54, 0.78]], ['GroupEndFrequency', [0.0, 1.0, 0.11, 0.79]], ['GroupWinFrequency', [0.0, 0.11, 1.0]], ['LoopEndFrequency', [0.0, 0.14, 0.66]], ['LoopWinFrequency', [0.0, 0.14, 0.66]], ['PatternEndFrequency', [0.0, 0.63, 0.35]], ['PatternWinFrequency', [0.0, 0.63, 0.35]], ['NoTargetPieceWinFrequency', [0.0, 0.72, 0.77, 0.95, 0.32, 1.0]], ['EliminatePiecesLossFrequency', [0.0, 0.85, 0.96, 0.68]], ['EliminatePiecesDrawFrequency', [0.0, 0.03, 0.91, 1.0, 0.36, 0.86]], ['NoOwnPiecesLossFrequency', [0.0, 1.0, 0.68]], ['FillEndFrequency', [0.0, 1.0, 0.04, 0.01, 0.99, 0.72]], ['FillWinFrequency', [0.0, 1.0, 0.04, 0.01, 0.99]], ['ReachDrawFrequency', [0.0, 0.9, 0.98]], ['ScoringLossFrequency', [0.0, 0.6, 0.62]], ['NoMovesLossFrequency', [0.0, 1.0, 0.13, 0.06]], ['NoMovesDrawFrequency', [0.0, 0.01, 0.04, 0.03, 0.22]], ['BoardSitesOccupiedChangeNumTimes', [0.0, 0.06, 0.42, 0.12, 0.14, 0.94]], ['BranchingFactorChangeNumTimesn', [0.0, 0.3, 0.02, 0.07, 0.04, 0.13, 0.01, 0.21, 0.03]], ['PieceNumberChangeNumTimes', [0.0, 0.06, 0.42, 0.12, 0.14, 1.0]], ['selection1', ['ProgressiveHistory', 'UCB1', 'UCB1GRAVE', 'UCB1Tuned']], ['selection2', ['ProgressiveHistory', 'UCB1GRAVE', 'UCB1', 'UCB1Tuned']], ['exploration_const1', ['0.1', '0.6', '1.41421356237']], ['exploration_const2', ['0.6', '0.1', '1.41421356237']], ['playout1', ['MAST', 'NST', 'Random200']], ['playout2', ['Random200', 'NST', 'MAST']]]
            for col,unique in onehot_cols: # 각 열과 고유값에 대해 One-hot encoding
                for u in unique:
                    df[f'{col}_{u}']=(df[col]==u).astype(np.int8)
                    
                    
            print("deal with LudRules") 
            print("1:drop game")
           # LudRules 열에서 게임 이름을 제거하는 함수 정의
            def drop_gamename(rule):
                rule=rule[len('(game "'):]
                for i in range(len(rule)):
                    if rule[i]=='"':
                        return rule[i+1:]
                        # LudRules 열에서 게임 이름 제거
            df['LudRules']=df['LudRules'].apply(lambda x:drop_gamename(x))

            print("2:player")
            # LudRules에서 플레이어 정보를 추출하는 함수 정의
            def get_player(rule):
                player=''
                stack=[]# 괄호와 중괄호의 짝을 맞추기 위한 스택
                for i in range(len(rule)):
                    player+=rule[i]
                    if rule[i] in ['(','{']:
                        stack.append(rule[i])  # 여는 괄호는 스택에 추가
                    elif rule[i] in [')','}']:
                        stack=stack[:-1] # 닫는 괄호는 스택에서 제거
                        if len(stack)==0:# 스택이 비면 플레이어 정보 반환
                            return player
                            # LudRules에서 플레이어 정보 추출 및 데이터프레임에 추가
            df['player']=df['LudRules'].apply(lambda rule:get_player(rule))
            df=self.clean(df,'player')
            # player 열 정리
            df['player_len']=df['player'].apply(len) # 결측값 처리 및 소문자로 변환
            df['LudRules']=[rule[len(player):] for player,rule in zip(df['player'],df['LudRules'])] # 플레이어 문자열 길이 추가
            df.drop(['player'],axis=1,inplace=True) # player 정보를 제외한 나머지 LudRules 값
             
            print("Rules readable") # player 열 삭제
            for rule in ['EnglishRules', 'LudRules']: # LudRules 및 EnglishRules 열에 대해 읽기 점수 계산
                df[rule+"_ARI"]=df[rule].apply(lambda x:self.ARI(x)) # ARI 점수 계산
                df[rule+"CLRI"]=df[rule].apply(lambda x:self.CLRI(x)) # CLRI 점수 계산
                df[rule+"McAlpine_EFLAW"]=df[rule].apply(lambda x:self.McAlpine_EFLAW(x)) # McAlpine EFLAW 점수 계산
                    
            df['PlayoutsPerSecond/MovesPerSecond']=df['PlayoutsPerSecond']/df['MovesPerSecond']
            
            # 출현 빈도가 1% 미만인 열 제거
            drop_cols=['Cooperation', 'Team', 'TriangleShape', 'DiamondShape', 'SpiralShape', 'StarShape', 'SquarePyramidalShape', 'SemiRegularTiling', 'CircleTiling', 'SpiralTiling', 'MancalaThreeRows', 'MancalaSixRows', 'MancalaCircular', 'AlquerqueBoardWithOneTriangle', 'AlquerqueBoardWithTwoTriangles', 'AlquerqueBoardWithFourTriangles', 'AlquerqueBoardWithEightTriangles', 'ThreeMensMorrisBoard', 'ThreeMensMorrisBoardWithTwoTriangles', 'NineMensMorrisBoard', 'StarBoard', 'PachisiBoard', 'Boardless', 'NumColumns', 'NumCorners', 'NumOffDiagonalDirections', 'NumLayers', 'NumCentreSites', 'NumConvexCorners', 'NumPhasesBoard', 'NumContainers', 'Piece', 'PieceValue', 'PieceRotation', 'PieceDirection', 'LargePiece', 'Tile', 'NumComponentsType', 'NumDice', 'OpeningContract', 'SwapOption', 'Repetition', 'TurnKo', 'PositionalSuperko', 'AutoMove', 'InitialRandomPlacement', 'InitialScore', 'InitialCost', 'Moves', 'VoteDecision', 'SwapPlayersDecision', 'SwapPlayersDecisionFrequency', 'ProposeDecision', 'ProposeDecisionFrequency', 'PromotionDecisionFrequency', 'RotationDecision', 'RotationDecisionFrequency', 'StepDecisionToFriend', 'StepDecisionToFriendFrequency', 'StepDecisionToEnemy', 'SlideDecisionToEnemy', 'SlideDecisionToEnemyFrequency', 'SlideDecisionToFriend', 'SlideDecisionToFriendFrequency', 'LeapDecision', 'LeapDecisionFrequency', 'LeapDecisionToEmpty', 'LeapDecisionToEmptyFrequency', 'LeapDecisionToEnemy', 'LeapDecisionToEnemyFrequency', 'HopDecisionFriendToEmpty', 'HopDecisionFriendToEmptyFrequency', 'HopDecisionFriendToFriendFrequency', 'HopDecisionEnemyToEnemy', 'HopDecisionEnemyToEnemyFrequency', 'HopDecisionFriendToEnemy', 'HopDecisionFriendToEnemyFrequency', 'FromToDecisionFrequency', 'FromToDecisionEnemy', 'FromToDecisionEnemyFrequency', 'FromToDecisionFriend', 'SwapPiecesDecision', 'SwapPiecesDecisionFrequency', 'ShootDecision', 'ShootDecisionFrequency', 'VoteEffect', 'SwapPlayersEffect', 'PassEffect', 'ProposeEffect', 'ProposeEffectFrequency', 'AddEffectFrequency', 'SowFrequency', 'SowCapture', 'SowCaptureFrequency', 'SowRemove', 'SowBacktracking', 'SowBacktrackingFrequency', 'SowProperties', 'SowOriginFirst', 'SowCCW', 'PromotionEffectFrequency', 'PushEffect', 'PushEffectFrequency', 'Flip', 'FlipFrequency', 'SetNextPlayer', 'SetValue', 'SetValueFrequency', 'SetCount', 'SetCountFrequency', 'SetRotation', 'SetRotationFrequency', 'StepEffect', 'SlideEffect', 'LeapEffect', 'ByDieMove', 'MaxDistance', 'ReplacementCaptureFrequency', 'HopCaptureMoreThanOne', 'DirectionCapture', 'DirectionCaptureFrequency', 'EncloseCaptureFrequency', 'CustodialCapture', 'CustodialCaptureFrequency', 'InterveneCapture', 'InterveneCaptureFrequency', 'SurroundCapture', 'SurroundCaptureFrequency', 'CaptureSequence', 'CaptureSequenceFrequency', 'Group', 'Loop', 'Pattern', 'PathExtent', 'Territory', 'Fill', 'CanNotMove', 'Threat', 'CountPiecesMoverComparison', 'ProgressCheck', 'RotationalDirection', 'SameLayerDirection', 'ForwardDirection', 'BackwardDirection', 'BackwardsDirection', 'LeftwardDirection', 'RightwardsDirection', 'LeftwardsDirection', 'ForwardLeftDirection', 'ForwardRightDirection', 'BackwardLeftDirection', 'BackwardRightDirection', 'SameDirection', 'OppositeDirection', 'NumPlayPhase', 'LineLoss', 'LineLossFrequency', 'LineDraw', 'ConnectionEnd', 'ConnectionEndFrequency', 'ConnectionWinFrequency', 'ConnectionLoss', 'ConnectionLossFrequency', 'GroupEnd', 'GroupEndFrequency', 'GroupWin', 'GroupWinFrequency', 'GroupLoss', 'GroupDraw', 'LoopEnd', 'LoopEndFrequency', 'LoopWin', 'LoopWinFrequency', 'LoopLoss', 'PatternEnd', 'PatternEndFrequency', 'PatternWin', 'PatternWinFrequency', 'PathExtentEnd', 'PathExtentWin', 'PathExtentLoss', 'TerritoryEnd', 'TerritoryWin', 'TerritoryWinFrequency', 'Checkmate', 'CheckmateWin', 'NoTargetPieceEndFrequency', 'NoTargetPieceWin', 'NoTargetPieceWinFrequency', 'EliminatePiecesLoss', 'EliminatePiecesLossFrequency', 'EliminatePiecesDraw', 'EliminatePiecesDrawFrequency', 'NoOwnPiecesEnd', 'NoOwnPiecesWin', 'NoOwnPiecesLoss', 'NoOwnPiecesLossFrequency', 'FillEnd', 'FillEndFrequency', 'FillWin', 'FillWinFrequency', 'ReachWin', 'ReachLoss', 'ReachLossFrequency', 'ReachDraw', 'ReachDrawFrequency', 'ScoringLoss', 'ScoringLossFrequency', 'ScoringDraw', 'NoMovesLoss', 'NoMovesDrawFrequency', 'NoProgressEnd', 'NoProgressEndFrequency', 'NoProgressDraw', 'NoProgressDrawFrequency', 'BoardCoverageFull', 'BoardSitesOccupiedChangeNumTimes', 'BranchingFactorChangeLineBestFit', 'BranchingFactorChangeNumTimesn', 'DecisionFactorChangeNumTimes', 'MoveDistanceChangeSign', 'MoveDistanceChangeLineBestFit', 'PieceNumberChangeNumTimes', 'PieceNumberMaxIncrease', 'ScoreDifferenceMedian', 'ScoreDifferenceVariance', 'ScoreDifferenceChangeAverage', 'ScoreDifferenceChangeSign', 'ScoreDifferenceChangeLineBestFit', 'Math', 'Division', 'Modulo', 'Absolute', 'Exponentiation', 'Minimum', 'Maximum', 'Even', 'Odd', 'Visual', 'GraphStyle', 'MancalaStyle', 'PenAndPaperStyle', 'ShibumiStyle', 'BackgammonStyle', 'JanggiStyle', 'XiangqiStyle', 'ShogiStyle', 'TableStyle', 'SurakartaStyle', 'NoBoard', 'ChessComponent', 'KingComponent', 'QueenComponent', 'KnightComponent', 'RookComponent', 'BishopComponent', 'PawnComponent', 'FairyChessComponent', 'PloyComponent', 'ShogiComponent', 'XiangqiComponent', 'StrategoComponent', 'JanggiComponent', 'TaflComponent', 'StackType', 'Stack', 'ShowPieceValue', 'ShowPieceState', 'Implementation', 'StateType', 'StackState', 'VisitedSites', 'InternalCounter', 'SetInternalCounter', 'Efficiency', 'NumOffDiagonalDirections_0.0', 'NumOffDiagonalDirections_4.82', 'NumOffDiagonalDirections_2.0', 'NumOffDiagonalDirections_5.18', 'NumOffDiagonalDirections_3.08', 'NumOffDiagonalDirections_0.06', 'NumLayers_1', 'NumLayers_0', 'NumLayers_4', 'NumLayers_5', 'NumPhasesBoard_1', 'NumPhasesBoard_5', 'NumDice_0', 'NumDice_2', 'NumDice_6', 'NumDice_3', 'NumDice_5', 'NumDice_7', 'ProposeDecisionFrequency_0.0', 'ProposeDecisionFrequency_0.05', 'ProposeDecisionFrequency_0.01', 'PromotionDecisionFrequency_0.0', 'PromotionDecisionFrequency_0.01', 'PromotionDecisionFrequency_0.03', 'PromotionDecisionFrequency_0.02', 'PromotionDecisionFrequency_0.11', 'PromotionDecisionFrequency_0.05', 'PromotionDecisionFrequency_0.04', 'SlideDecisionToFriendFrequency_0.0', 'SlideDecisionToFriendFrequency_0.19', 'SlideDecisionToFriendFrequency_0.06', 'LeapDecisionToEnemyFrequency_0.0', 'LeapDecisionToEnemyFrequency_0.04', 'LeapDecisionToEnemyFrequency_0.01', 'LeapDecisionToEnemyFrequency_0.02', 'LeapDecisionToEnemyFrequency_0.07', 'LeapDecisionToEnemyFrequency_0.03', 'LeapDecisionToEnemyFrequency_0.14', 'LeapDecisionToEnemyFrequency_0.08', 'HopDecisionFriendToFriendFrequency_0.0', 'HopDecisionFriendToFriendFrequency_0.13', 'HopDecisionFriendToFriendFrequency_0.09', 'HopDecisionEnemyToEnemyFrequency_0.0', 'HopDecisionEnemyToEnemyFrequency_0.01', 'HopDecisionEnemyToEnemyFrequency_0.2', 'HopDecisionEnemyToEnemyFrequency_0.03', 'HopDecisionFriendToEnemyFrequency_0.0', 'HopDecisionFriendToEnemyFrequency_0.01', 'HopDecisionFriendToEnemyFrequency_0.09', 'HopDecisionFriendToEnemyFrequency_0.25', 'HopDecisionFriendToEnemyFrequency_0.02', 'FromToDecisionFrequency_0.0', 'FromToDecisionFrequency_0.38', 'FromToDecisionFrequency_1.0', 'FromToDecisionFrequency_0.31', 'FromToDecisionFrequency_0.94', 'FromToDecisionFrequency_0.67', 'ProposeEffectFrequency_0.0', 'ProposeEffectFrequency_0.01', 'ProposeEffectFrequency_0.03', 'PushEffectFrequency_0.0', 'PushEffectFrequency_0.5', 'PushEffectFrequency_0.96', 'PushEffectFrequency_0.25', 'FlipFrequency_0.0', 'FlipFrequency_0.87', 'FlipFrequency_1.0', 'FlipFrequency_0.96', 'SetCountFrequency_0.0', 'SetCountFrequency_0.62', 'SetCountFrequency_0.54', 'SetCountFrequency_0.02', 'DirectionCaptureFrequency_0.0', 'DirectionCaptureFrequency_0.55', 'DirectionCaptureFrequency_0.54', 'EncloseCaptureFrequency_0.0', 'EncloseCaptureFrequency_0.08', 'EncloseCaptureFrequency_0.1', 'EncloseCaptureFrequency_0.07', 'EncloseCaptureFrequency_0.12', 'EncloseCaptureFrequency_0.02', 'EncloseCaptureFrequency_0.09', 'InterveneCaptureFrequency_0.0', 'InterveneCaptureFrequency_0.01', 'InterveneCaptureFrequency_0.14', 'InterveneCaptureFrequency_0.04', 'SurroundCaptureFrequency_0.0', 'SurroundCaptureFrequency_0.01', 'SurroundCaptureFrequency_0.03', 'SurroundCaptureFrequency_0.02', 'NumPlayPhase_3', 'NumPlayPhase_4', 'NumPlayPhase_5', 'NumPlayPhase_6', 'NumPlayPhase_7', 'NumPlayPhase_8', 'LineLossFrequency_0.0', 'LineLossFrequency_0.96', 'LineLossFrequency_0.87', 'LineLossFrequency_0.46', 'LineLossFrequency_0.26', 'LineLossFrequency_0.88', 'LineLossFrequency_0.94', 'ConnectionEndFrequency_0.0', 'ConnectionEndFrequency_0.19', 'ConnectionEndFrequency_1.0', 'ConnectionEndFrequency_0.23', 'ConnectionEndFrequency_0.94', 'ConnectionEndFrequency_0.35', 'ConnectionEndFrequency_0.97', 'ConnectionLossFrequency_0.0', 'ConnectionLossFrequency_0.54', 'ConnectionLossFrequency_0.78', 'GroupEndFrequency_0.0', 'GroupEndFrequency_1.0', 'GroupEndFrequency_0.11', 'GroupEndFrequency_0.79', 'GroupWinFrequency_0.0', 'GroupWinFrequency_0.11', 'GroupWinFrequency_1.0', 'LoopEndFrequency_0.0', 'LoopEndFrequency_0.14', 'LoopEndFrequency_0.66', 'LoopWinFrequency_0.0', 'LoopWinFrequency_0.14', 'LoopWinFrequency_0.66', 'PatternEndFrequency_0.0', 'PatternEndFrequency_0.63', 'PatternEndFrequency_0.35', 'PatternWinFrequency_0.0', 'PatternWinFrequency_0.63', 'PatternWinFrequency_0.35', 'NoTargetPieceWinFrequency_0.0', 'NoTargetPieceWinFrequency_0.72', 'NoTargetPieceWinFrequency_0.77', 'NoTargetPieceWinFrequency_0.95', 'NoTargetPieceWinFrequency_0.32', 'NoTargetPieceWinFrequency_1.0', 'EliminatePiecesLossFrequency_0.0', 'EliminatePiecesLossFrequency_0.85', 'EliminatePiecesLossFrequency_0.96', 'EliminatePiecesLossFrequency_0.68', 'EliminatePiecesDrawFrequency_0.0', 'EliminatePiecesDrawFrequency_0.03', 'EliminatePiecesDrawFrequency_0.91', 'EliminatePiecesDrawFrequency_1.0', 'EliminatePiecesDrawFrequency_0.36', 'EliminatePiecesDrawFrequency_0.86', 'NoOwnPiecesLossFrequency_0.0', 'NoOwnPiecesLossFrequency_1.0', 'NoOwnPiecesLossFrequency_0.68', 'FillEndFrequency_0.0', 'FillEndFrequency_1.0', 'FillEndFrequency_0.04', 'FillEndFrequency_0.01', 'FillEndFrequency_0.99', 'FillEndFrequency_0.72', 'FillWinFrequency_0.0', 'FillWinFrequency_1.0', 'FillWinFrequency_0.04', 'FillWinFrequency_0.01', 'FillWinFrequency_0.99', 'ReachDrawFrequency_0.0', 'ReachDrawFrequency_0.9', 'ReachDrawFrequency_0.98', 'ScoringLossFrequency_0.0', 'ScoringLossFrequency_0.6', 'ScoringLossFrequency_0.62', 'NoMovesLossFrequency_0.0', 'NoMovesLossFrequency_1.0', 'NoMovesLossFrequency_0.13', 'NoMovesLossFrequency_0.06', 'NoMovesDrawFrequency_0.0', 'NoMovesDrawFrequency_0.01', 'NoMovesDrawFrequency_0.04', 'NoMovesDrawFrequency_0.03', 'NoMovesDrawFrequency_0.22', 'BoardSitesOccupiedChangeNumTimes_0.0', 'BoardSitesOccupiedChangeNumTimes_0.06', 'BoardSitesOccupiedChangeNumTimes_0.42', 'BoardSitesOccupiedChangeNumTimes_0.12', 'BoardSitesOccupiedChangeNumTimes_0.14', 'BoardSitesOccupiedChangeNumTimes_0.94', 'BranchingFactorChangeNumTimesn_0.0', 'BranchingFactorChangeNumTimesn_0.3', 'BranchingFactorChangeNumTimesn_0.02', 'BranchingFactorChangeNumTimesn_0.07', 'BranchingFactorChangeNumTimesn_0.04', 'BranchingFactorChangeNumTimesn_0.13', 'BranchingFactorChangeNumTimesn_0.01', 'BranchingFactorChangeNumTimesn_0.21', 'BranchingFactorChangeNumTimesn_0.03', 'PieceNumberChangeNumTimes_0.0', 'PieceNumberChangeNumTimes_0.06', 'PieceNumberChangeNumTimes_0.42', 'PieceNumberChangeNumTimes_0.12', 'PieceNumberChangeNumTimes_0.14', 'PieceNumberChangeNumTimes_1.0', 'KintsBoard', 'FortyStonesWithFourGapsBoard', 'Roll', 'SumDice', 'CheckmateFrequency', 'NumDice_4']
            
            df.drop(['Id',
            # 모든 값이 동일한 열 제거
            'Properties', 'Format', 'Time', 'Discrete', 'Realtime', 'Turns', 'Alternating', 'Simultaneous', 'HiddenInformation', 'Match', 'AsymmetricRules', 'AsymmetricPlayRules', 'AsymmetricEndRules', 'AsymmetricSetup', 'Players', 'NumPlayers', 'Simulation', 'Solitaire', 'TwoPlayer', 'Multiplayer', 'Coalition', 'Puzzle', 'DeductionPuzzle', 'PlanningPuzzle', 'Equipment', 'Container', 'Board', 'PrismShape', 'ParallelogramShape', 'RectanglePyramidalShape', 'TargetShape', 'BrickTiling', 'CelticTiling', 'QuadHexTiling', 'Hints', 'PlayableSites', 'Component', 'DiceD3', 'BiasedDice', 'Card', 'Domino', 'Rules', 'SituationalTurnKo', 'SituationalSuperko', 'InitialAmount', 'InitialPot', 'Play', 'BetDecision', 'BetDecisionFrequency', 'VoteDecisionFrequency', 'ChooseTrumpSuitDecision', 'ChooseTrumpSuitDecisionFrequency', 'LeapDecisionToFriend', 'LeapDecisionToFriendFrequency', 'HopDecisionEnemyToFriend', 'HopDecisionEnemyToFriendFrequency', 'HopDecisionFriendToFriend', 'FromToDecisionWithinBoard', 'FromToDecisionBetweenContainers', 'BetEffect', 'BetEffectFrequency', 'VoteEffectFrequency', 'SwapPlayersEffectFrequency', 'TakeControl', 'TakeControlFrequency', 'PassEffectFrequency', 'SetCost', 'SetCostFrequency', 'SetPhase', 'SetPhaseFrequency', 'SetTrumpSuit', 'SetTrumpSuitFrequency', 'StepEffectFrequency', 'SlideEffectFrequency', 'LeapEffectFrequency', 'HopEffectFrequency', 'FromToEffectFrequency', 'SwapPiecesEffect', 'SwapPiecesEffectFrequency', 'ShootEffect', 'ShootEffectFrequency', 'MaxCapture', 'OffDiagonalDirection', 'Information', 'HidePieceType', 'HidePieceOwner', 'HidePieceCount', 'HidePieceRotation', 'HidePieceValue', 'HidePieceState', 'InvisiblePiece', 'End', 'LineDrawFrequency', 'ConnectionDraw', 'ConnectionDrawFrequency', 'GroupLossFrequency', 'GroupDrawFrequency', 'LoopLossFrequency', 'LoopDraw', 'LoopDrawFrequency', 'PatternLoss', 'PatternLossFrequency', 'PatternDraw', 'PatternDrawFrequency', 'PathExtentEndFrequency', 'PathExtentWinFrequency', 'PathExtentLossFrequency', 'PathExtentDraw', 'PathExtentDrawFrequency', 'TerritoryLoss', 'TerritoryLossFrequency', 'TerritoryDraw', 'TerritoryDrawFrequency', 'CheckmateLoss', 'CheckmateLossFrequency', 'CheckmateDraw', 'CheckmateDrawFrequency', 'NoTargetPieceLoss', 'NoTargetPieceLossFrequency', 'NoTargetPieceDraw', 'NoTargetPieceDrawFrequency', 'NoOwnPiecesDraw', 'NoOwnPiecesDrawFrequency', 'FillLoss', 'FillLossFrequency', 'FillDraw', 'FillDrawFrequency', 'ScoringDrawFrequency', 'NoProgressWin', 'NoProgressWinFrequency', 'NoProgressLoss', 'NoProgressLossFrequency', 'SolvedEnd', 'Behaviour', 'StateRepetition', 'PositionalRepetition', 'SituationalRepetition', 'Duration', 'Complexity', 'BoardCoverage', 'GameOutcome', 'StateEvaluation', 'Clarity', 'Narrowness', 'Variance', 'Decisiveness', 'DecisivenessMoves', 'DecisivenessThreshold', 'LeadChange', 'Stability', 'Drama', 'DramaAverage', 'DramaMedian', 'DramaMaximum', 'DramaMinimum', 'DramaVariance', 'DramaChangeAverage', 'DramaChangeSign', 'DramaChangeLineBestFit', 'DramaChangeNumTimes', 'DramaMaxIncrease', 'DramaMaxDecrease', 'MoveEvaluation', 'MoveEvaluationAverage', 'MoveEvaluationMedian', 'MoveEvaluationMaximum', 'MoveEvaluationMinimum', 'MoveEvaluationVariance', 'MoveEvaluationChangeAverage', 'MoveEvaluationChangeSign', 'MoveEvaluationChangeLineBestFit', 'MoveEvaluationChangeNumTimes', 'MoveEvaluationMaxIncrease', 'MoveEvaluationMaxDecrease', 'StateEvaluationDifference', 'StateEvaluationDifferenceAverage', 'StateEvaluationDifferenceMedian', 'StateEvaluationDifferenceMaximum', 'StateEvaluationDifferenceMinimum', 'StateEvaluationDifferenceVariance', 'StateEvaluationDifferenceChangeAverage', 'StateEvaluationDifferenceChangeSign', 'StateEvaluationDifferenceChangeLineBestFit', 'StateEvaluationDifferenceChangeNumTimes', 'StateEvaluationDifferenceMaxIncrease', 'StateEvaluationDifferenceMaxDecrease', 'BoardSitesOccupied', 'BoardSitesOccupiedMinimum', 'BranchingFactor', 'BranchingFactorMinimum', 'DecisionFactor', 'DecisionFactorMinimum', 'MoveDistance', 'MoveDistanceMinimum', 'PieceNumber', 'PieceNumberMinimum', 'ScoreDifference', 'ScoreDifferenceMinimum', 'ScoreDifferenceChangeNumTimes', 'Roots', 'Cosine', 'Sine', 'Tangent', 'Exponential', 'Logarithm', 'ExclusiveDisjunction', 'Float', 'HandComponent', 'SetHidden', 'SetInvisible', 'SetHiddenCount', 'SetHiddenRotation', 'SetHiddenState', 'SetHiddenValue', 'SetHiddenWhat', 'SetHiddenWho',
            # 훈련 데이터에만 존재하는 열 제거
            'num_wins_agent1', 'num_draws_agent1', 'num_losses_agent1',
            #object
            'Behaviour', 'StateRepetition', 'Duration', 'Complexity', 'BoardCoverage', 'GameOutcome', 'StateEvaluation', 'Clarity', 'Decisiveness', 'Drama', 'MoveEvaluation', 'StateEvaluationDifference', 'BoardSitesOccupied', 'BranchingFactor', 'DecisionFactor', 'MoveDistance', 'PieceNumber', 'ScoreDifference','selection1', 'selection2', 'exploration_const1', 'exploration_const2', 'playout1', 'playout2', 'score_bounds1', 'score_bounds2',
            ]+drop_cols,axis=1,inplace=True,errors='ignore')#对于测试集中没有的列可以直接忽略 
            
            df=self.reduce_mem_usage(df)
            print(f"feature_count:{len(df.columns)}")
            print("-"*30)
            return df

        def CV_feats(self,df,mode='',model_name='',fold=0):
            str_cols=['EnglishRules', 'LudRules']#'agent1','agent2',
            for col in str_cols:
                df=self.clean(df,col)
                df[f'{col}_len']=df[col].apply(len)
                if mode=='train':
                    tfidf = TfidfVectorizer(max_features=500,ngram_range=(2,3))
                    tfidf_feats=tfidf.fit_transform(df[col]).toarray()
                    for i in range(tfidf_feats.shape[1]):
                        df[f"{col}_tfidf_{i}"]=tfidf_feats[:,i]
                    self.pickle_dump(tfidf,f'{model_name}_{fold}_{col}tfidf.model')
                    self.tfidf_paths.append((model_name,fold,col))
                else:#mode=='test'
                    for i in range(len(self.tfidf_paths)):
                        if (model_name,fold,col)==self.tfidf_paths[i]:
                            tfidf=self.pickle_load(f'{model_name}_{fold}_{col}tfidf.model')
                            tfidf_feats=tfidf.transform(df[col]).toarray()
                            for j in range(tfidf_feats.shape[1]):
                                df[f"{col}_tfidf_{j}"]=tfidf_feats[:,j]
            df.drop(str_cols+['agent1','agent2'],axis=1,inplace=True)
            return df 
        
        def RMSE(self,y_true,y_pred):
            return np.sqrt(np.mean((y_true-y_pred)**2))
        
        def train_model(self,):
            self.train=self.FE(self.train,mode='train')
            #https://www.kaggle.com/code/ravi20076/mcts2024-mlmodels-v1/notebook
            cat_params1={
                'task_type'           : "GPU",
                'eval_metric'         : "RMSE",
                'bagging_temperature' : 0.50,
                'iterations'          : 100 if APP.small_iterations else 3096,
                'learning_rate'       : 0.08,
                'max_depth'           : 12,
                'l2_leaf_reg'         : 1.25,
                'min_data_in_leaf'    : 24,
                'random_strength'     : 0.25, 
                'verbose'             : 0,
                }
            
            cat_params2={
                'task_type'           : "GPU",
                'eval_metric'         : "RMSE",
                'bagging_temperature' : 0.60,
                'iterations'          : 100 if APP.small_iterations else 3096,
                'learning_rate'       : 0.08,
                'max_depth'           : 12,
                'l2_leaf_reg'         : 1.25,
                'min_data_in_leaf'    : 24,
                'random_strength'     : 0.20, 
                'max_bin'             :2048,
                'verbose'             : 0,
                }
            models=[
                    (CatBoostRegressor(**cat_params1),'cat1'),
                    (CatBoostRegressor(**cat_params2),'cat2'),
                ]
            if APP.short_dataset:
                self.train = self.train[:1000]
            for (model,model_name) in models:
                print("start training")
                X=self.train.drop([self.target,'GameRulesetName'],axis=1)
                GameRulesetName=self.train['GameRulesetName']
                y=self.train[self.target]
                oof_preds=np.zeros(len(X))
                
                y_int=round(y*15)
                
                sgkf = StratifiedGroupKFold(n_splits=self.num_folds,random_state=2024,shuffle=True)

                for fold, (train_index, valid_index) in (enumerate(sgkf.split(X,y_int,GameRulesetName))):
                    print(f"fold:{fold}")

                    X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
                    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]

                    X_train=self.CV_feats(X_train,mode='train',model_name=model_name,fold=fold)
                    X_valid=self.CV_feats(X_valid,mode='test',model_name=model_name,fold=fold)

                    model.fit(X_train, y_train,
                        eval_set=(X_valid, y_valid),
                        early_stopping_rounds=100, verbose=100)
                    
                    oof_preds[valid_index]=model.predict(X_valid)

                    self.pickle_dump(model,f'{model_name}_{fold}.model')
                    self.model_paths.append((model_name,fold))

                    del X_train,X_valid,y_train,y_valid
                    gc.collect()
                
                np.save(f"{model_name}_oof.npy",np.clip(oof_preds*1.1,-0.985,0.985))
                
                print(f"RMSE:{self.RMSE(y.values,np.clip(oof_preds*1.1,-0.985,0.985) )}")
                
        def infer_model(self,test):
            test=self.FE(test,mode='test')
            test.drop(['GameRulesetName'],axis=1,inplace=True)
            test_preds=[]
            for i in range(len(self.model_paths)):
                model_name,fold=self.model_paths[i]
                test_copy=self.CV_feats(test.copy(),mode='test',model_name=model_name,fold=fold)
                model=self.pickle_load(f'{model_name}_{fold}.model')
                test_preds+=[np.clip(model.predict(test_copy)*1.1,-0.985,0.985)]
            return np.mean(test_preds,axis=0)
        
    preprocessor=Preprocessor(num_folds=5,train=train)
    counter = 0
    def predict(test, submission):
        if model_1.counter == 0:
            model_1.preprocessor.train_model()  
        model_1.counter += 1
        return model_1.preprocessor.infer_model(test.to_pandas())

---
---
# **》》》Model2**
---
---

In [None]:
class model_2:
    class CFG:
        importances_path = Path('/kaggle/input/mcts-gbdt-select-200-features/importances.csv')    
        train_path = Path('/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv')
        batch_size = 65536

        early_stop = 500
        n_splits = 5
        color = '#C9A9A6'
        
        lgb_w = 0.85
        lgb_p = {
            'objective': 'regression',
            'min_child_samples': 24,
            'num_iterations': 200 if APP.small_iterations else 20000,
            'learning_rate': 0.07,
            'extra_trees': True,
            'reg_lambda': 0.8,
            'reg_alpha': 0.1,
            'num_leaves': 64,
            'metric': 'rmse',
            'device': 'CPU',
            'max_depth': 24,
            'max_bin': 128,
            'verbose': -1,
            'seed': 42
        }
        
        ctb_w = 0.25
        ctb_p = {
            'loss_function': 'RMSE',
            'learning_rate': 0.03,
            'num_trees': 200 if APP.small_iterations else 20000,
            'random_state': 42,
            'task_type': 'CPU',
            'reg_lambda': 0.8,
            'depth': 8
        }

    class FE:
        def __init__(self, batch_size):
            self.batch_size = batch_size
            
        def drop_cols(self, df, bad_cols=None): # bad_cols must be provided when processing the test data
            # Define redundant columns for model development
            cols = ['Id', 
                    'LudRules', 
                    'EnglishRules',
                    'num_wins_agent1',
                    'num_draws_agent1',
                    'num_losses_agent1']
            
            df = df.drop([col for col in cols if col in df.columns])
            
            # Select and drop columns with 100% null values
            df = df.drop([col for col in df.columns if df.select(pl.col(col).null_count()).item() == df.height])
            
            # Select (if not provided) and drop columns with only one unique value
            bad_cols = [col for col in df.columns if df.select(pl.col(col).n_unique()).item() == 1] if bad_cols is None else bad_cols
            df = df.drop(bad_cols)
            return df, bad_cols
        
        def cast_datatypes(self, df):
            # Set datatype for categorical columns
            cat_cols = ['GameRulesetName', 'agent1', 'agent2']
            df = df.with_columns([pl.col(col).cast(pl.String) for col in cat_cols])   
            
            # Find numeric columns
            for col in df.columns:
                if col not in cat_cols:
                    # Set datatype for a numeric column as per the datatype of the first non-null item
                    val = df.select(pl.col(col).drop_nulls().first()).item()
                    df = df.with_columns(pl.col(col).cast(pl.Int16) if isinstance(val, int) else pl.col(col).cast(pl.Float32))   
            return df    
        
        def info(self, df):
            print(f'Shape: {df.shape}')   
            mem = df.estimated_size() / 1024**2
            print('Memory usage: {:.2f} MB\n'.format(mem))
            
        def apply_fe(self, path):            
            df = pl.read_csv(path, batch_size=self.batch_size)
            df, bad_cols = self.drop_cols(df)
            df = self.cast_datatypes(df)
            self.info(df)
            cat_cols = [col for col in df.columns if df[col].dtype == pl.String]
            return df, bad_cols, cat_cols

    fe = FE(CFG.batch_size)

    class MD:
        def __init__(self, 
                    importances_path, 
                    early_stop, 
                    n_splits, 
                    lgb_w, 
                    lgb_p, 
                    ctb_w, 
                    ctb_p, 
                    color):
            self.importances_path = importances_path
            self.early_stop = early_stop
            self.n_splits = n_splits
            self.lgb_w = lgb_w
            self.lgb_p = lgb_p
            self.ctb_w = ctb_w
            self.ctb_p = ctb_p
            self.color = color
            
        def plot_cv(self, fold_scores, title):
            fold_scores = [round(score, 3) for score in fold_scores]
            mean_score = round(np.mean(fold_scores), 3)
            std_score = round(np.std(fold_scores), 3)

            fig = go.Figure()

            fig.add_trace(go.Scatter(
                x = list(range(1, len(fold_scores) + 1)),
                y = fold_scores,
                mode = 'markers', 
                name = 'Fold Scores',
                marker = dict(size = 24, color=self.color, symbol='diamond'),
                text = [f'{score:.3f}' for score in fold_scores],
                hovertemplate = 'Fold %{x}: %{text}<extra></extra>',
                hoverlabel=dict(font=dict(size=16))  
            ))

            fig.add_trace(go.Scatter(
                x = [1, len(fold_scores)],
                y = [mean_score, mean_score],
                mode = 'lines',
                name = f'Mean: {mean_score:.3f}',
                line = dict(dash = 'dash', color = '#FFBF00'),
                hoverinfo = 'none'
            ))

            fig.update_layout(
                title = f'{title} | Cross-Validation RMSE Scores | Variation of CV scores: {mean_score} ± {std_score}',
                xaxis_title = 'Fold',
                yaxis_title = 'RMSE Score',
                plot_bgcolor = 'rgba(0,0,0,0)',
                paper_bgcolor = 'rgba(0,0,0,0)',
                xaxis = dict(
                    gridcolor = 'lightgray',
                    tickmode = 'linear',
                    tick0 = 1,
                    dtick = 1,
                    range = [0.5, len(fold_scores) + 0.5]
                ),
                yaxis = dict(gridcolor = 'lightgray')
            )
            fig.show() 
            
        def train_model(self, data, cat_cols, title):
            importances = pd.read_csv(self.importances_path)
            
            for col in cat_cols:
                data[col] = data[col].astype('category')
            
            # Define features (X), label (y) and grouping column (group) for CV
            X = data.drop(['utility_agent1'], axis=1)
            y = data['utility_agent1']
            group = data['GameRulesetName']
            y_int=round(y*15)
            cv = StratifiedGroupKFold(n_splits=self.n_splits)
            models, scores = [], []
            
            # Initialize out-of-fold predictions array
            oof_preds = np.zeros(len(X))
            
            for fold, (train_index, valid_index) in enumerate(cv.split(X, y_int, group)):
                drop_features = importances['drop_features'].tolist()
                X_train, X_valid = X.iloc[train_index].drop(drop_features, axis=1), X.iloc[valid_index].drop(drop_features, axis=1)
                y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
                print(f'Fold {fold+1} | {X_train.shape[0]:,} train rows | {X_valid.shape[0]:,} valid rows | {X_train.shape[1]} features')
                    
                if title.startswith('LightGBM'):
                    model = lgb.LGBMRegressor(**self.lgb_p)

                    model.fit(X_train, y_train,
                            eval_set=[(X_valid, y_valid)],
                            eval_metric='rmse',
                            callbacks=[lgb.early_stopping(self.early_stop, verbose=0), lgb.log_evaluation(0)])
                
                elif title.startswith('CatBoost'):
                    model = CatBoostRegressor(**self.ctb_p, verbose=0, cat_features=cat_cols)

                    model.fit(X_train, y_train,
                            eval_set=(X_valid, y_valid),
                            early_stopping_rounds=self.early_stop, verbose=0)

                models.append(model)

                # Store out-of-fold predictions for this fold
                oof_preds[valid_index] = model.predict(X_valid)
                score = mse(y_valid, oof_preds[valid_index], squared=False)
                scores.append(score)
            
            self.plot_cv(scores, title)
            return models, oof_preds
        
        def inference(self, data, cat_cols, lgb_models, ctb_models, lgb_models_oof, ctb_models_oof):
            importances = pd.read_csv(self.importances_path)
                
            drop_features = importances['drop_features'].tolist()
            data = data.drop(drop_features, axis=1)

            for col in cat_cols:
                data[col] = data[col].astype('category')
                    
            data['lgb_oof_preds'] = np.mean([model.predict(data) for model in lgb_models], axis=0)
            data['ctb_oof_preds'] = np.mean([model.predict(data) for model in ctb_models], axis=0)
            
            lgb_preds = np.mean([model.predict(data) for model in lgb_models_oof], axis=0)  
            ctb_preds = np.mean([model.predict(data) for model in ctb_models_oof], axis=0)    
            
            return lgb_preds * self.lgb_w + ctb_preds * self.ctb_w
        
    md = MD(CFG.importances_path, 
            CFG.early_stop, 
            CFG.n_splits, 
            CFG.lgb_w, 
            CFG.lgb_p, 
            CFG.ctb_w, 
            CFG.ctb_p, 
            CFG.color)

    bad_cols = None
    cat_cols = None
    lgb_models = None
    ctb_models = None
    lgb_models_oof = None
    ctb_models_oof = None

    def train_model():            
        train, model_2.bad_cols, model_2.cat_cols = model_2.fe.apply_fe(model_2.CFG.train_path)
        train = train.to_pandas()
        if APP.short_dataset:
            train = train[:1000]
        model_2.lgb_models, lgb_oof_preds = model_2.md.train_model(train, model_2.cat_cols, title='LightGBM')
        model_2.ctb_models, ctb_oof_preds = model_2.md.train_model(train, model_2.cat_cols, title='CatBoost')
        train['lgb_oof_preds'] = lgb_oof_preds
        train['ctb_oof_preds'] = ctb_oof_preds
        model_2.lgb_models_oof, _ = model_2.md.train_model(train, model_2.cat_cols, title='LightGBM w/ OOF')
        model_2.ctb_models_oof, _ = model_2.md.train_model(train, model_2.cat_cols, title='CatBoost w/ OOF')

    counter = 0
    def predict(test, submission):
        if model_2.counter == 0:
            model_2.train_model() 
        model_2.counter += 1
        test, _ = model_2.fe.drop_cols(test, model_2.bad_cols)
        test = model_2.fe.cast_datatypes(test)
        test = test.to_pandas()
        return model_2.md.inference(test, model_2.cat_cols, model_2.lgb_models, model_2.ctb_models, model_2.lgb_models_oof, model_2.ctb_models_oof)

# model 3

In [None]:
class model_3:
    class Config:
        train_path = '/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv'
        early_stop = 55
        n_splits = 5
        seed = 1212
        split_agent_features = True
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        mlp_params = {
            'input_dim': None,
            'hidden_dims': [
                512, 512, 512, 512,
                256, 256, 256, 256,
                128, 128, 128, 128,
                64, 64, 64, 64,
                32, 32, 32, 32
            ],
            'dropout_rate': 0.35,
            'learning_rate': 0.0008,
            'batch_size': 256,
            'epochs': 200,
            'use_residual': True,
        }

    class DataProcessor:
        def __init__(self, dropped_cols, agent_cols):
            self.dropped_cols = dropped_cols
            self.agent_cols = agent_cols
            self.scaler = StandardScaler()
            self.label_encoders = {}  

        def process_data(self, df):
            df = df.drop(filter(lambda x: x in df.columns, self.dropped_cols))
        
        
            if Config.split_agent_features:
                for col in self.agent_cols:
                    df = df.with_columns(
                        pl.col(col).str.split(by="-").list.to_struct(fields=lambda idx: f"{col}_{idx}")
                    ).unnest(col).drop(f"{col}_0")
        
     
            df = df.to_pandas()
        
        
            categorical_cols = [col for col in df.columns if col[:6] in self.agent_cols]
            numerical_cols = [col for col in df.columns if col[:6] not in self.agent_cols]
        
        
            for col in categorical_cols:
                if col not in self.label_encoders:
                    self.label_encoders[col] = LabelEncoder()
                    df[col] = self.label_encoders[col].fit_transform(df[col])
                else:
                    try:
                        df[col] = self.label_encoders[col].transform(df[col])
                    except ValueError:  
                        new_categories = set(df[col]) - set(self.label_encoders[col].classes_)
                        old_classes = self.label_encoders[col].classes_
                        n_old_classes = len(old_classes)
                        new_values = {cat: i + n_old_classes for i, cat in enumerate(new_categories)}
                        df[col] = df[col].map(lambda x: new_values.get(x, self.label_encoders[col].transform([x])[0]))
        
        
            df[numerical_cols] = df[numerical_cols].astype(np.float32)
        
            print(f'Data shape after processing agents: {df.shape}')
            return df

        def feature_engineering(self, df):
        
            numeric_cols = df.select_dtypes(include=[np.number]).columns
            df_numeric = df[numeric_cols].copy()
        
            df_numeric['Playouts/Moves'] = df_numeric['PlayoutsPerSecond'] / (df_numeric['MovesPerSecond'] + 1e-15)
            df_numeric['EfficiencyPerPlayout'] = df_numeric['MovesPerSecond'] / (df_numeric['PlayoutsPerSecond'] + 1e-15)
            df_numeric['TurnsDurationEfficiency'] = df_numeric['DurationActions'] / (df_numeric['DurationTurnsStdDev'] + 1e-15)
            df_numeric['AdvantageBalanceRatio'] = df_numeric['AdvantageP1'] / (df_numeric['Balance'] + 1e-15)
            df_numeric['ActionTimeEfficiency'] = df_numeric['DurationActions'] / (df_numeric['MovesPerSecond'] + 1e-15)
            df_numeric['StandardizedTurnsEfficiency'] = df_numeric['DurationTurnsStdDev'] / (df_numeric['DurationActions'] + 1e-15)
            df_numeric['AdvantageTimeImpact'] = df_numeric['AdvantageP1'] / (df_numeric['DurationActions'] + 1e-15)
            df_numeric['DurationToComplexityRatio'] = df_numeric['DurationActions'] / (df_numeric['StateTreeComplexity'] + 1e-15)
            df_numeric['NormalizedGameTreeComplexity'] = df_numeric['GameTreeComplexity'] / (df_numeric['StateTreeComplexity'] + 1e-15)
            df_numeric['ComplexityBalanceInteraction'] = df_numeric['Balance'] * df_numeric['GameTreeComplexity']
            df_numeric['OverallComplexity'] = df_numeric['StateTreeComplexity'] + df_numeric['GameTreeComplexity']
            df_numeric['ComplexityPerPlayout'] = df_numeric['GameTreeComplexity'] / (df_numeric['PlayoutsPerSecond'] + 1e-15)
            df_numeric['TurnsNotTimeouts/Moves'] = df_numeric['DurationTurnsNotTimeouts'] / (df_numeric['MovesPerSecond'] + 1e-15)
            df_numeric['Timeouts/DurationActions'] = df_numeric['Timeouts'] / (df_numeric['DurationActions'] + 1e-15)
            df_numeric['OutcomeUniformity/AdvantageP1'] = df_numeric['OutcomeUniformity'] / (df_numeric['AdvantageP1'] + 1e-15)
            df_numeric['ComplexDecisionRatio'] = (df_numeric['StepDecisionToEnemy'] + 
                                            df_numeric['SlideDecisionToEnemy'] + 
                                            df_numeric['HopDecisionMoreThanOne'])
            df_numeric['AggressiveActionsRatio'] = (df_numeric['StepDecisionToEnemy'] + 
                                               df_numeric['HopDecisionEnemyToEnemy'] + 
                                               df_numeric['HopDecisionFriendToEnemy'] + 
                                               df_numeric['SlideDecisionToEnemy'])

            new_features = [
            'Playouts/Moves', 'EfficiencyPerPlayout', 'TurnsDurationEfficiency',
            'AdvantageBalanceRatio', 'ActionTimeEfficiency', 'StandardizedTurnsEfficiency',
            'AdvantageTimeImpact', 'DurationToComplexityRatio', 'NormalizedGameTreeComplexity',
            'ComplexityBalanceInteraction', 'OverallComplexity', 'ComplexityPerPlayout',
            'TurnsNotTimeouts/Moves', 'Timeouts/DurationActions', 'OutcomeUniformity/AdvantageP1',
            'ComplexDecisionRatio', 'AggressiveActionsRatio'
            ]

       
            for col in new_features:
                df[col] = df_numeric[col]

            print(f'Data shape after new_features: {df.shape}')
            return df, new_features

        def process_and_engineer(self, df):
            df = self.process_data(df)
            df, new_features = self.feature_engineering(df)
            return df, new_features

    class MCTSDataset(Dataset):
        def __init__(self, X, y=None):
            self.X = torch.FloatTensor(X)
            self.y = torch.FloatTensor(y) if y is not None else None

        def __len__(self):
            return len(self.X)

        def __getitem__(self, idx):
            if self.y is not None:
                return self.X[idx], self.y[idx]
            return self.X[idx]

    class MLPModel(nn.Module):
        def __init__(self, input_dim, hidden_dims, dropout_rate=0.3, use_residual=True):
            super().__init__()
            layers = []
            prev_dim = input_dim

            for hidden_dim in hidden_dims:
                layers.append(nn.Sequential(
                    nn.Linear(prev_dim, hidden_dim),
                    nn.BatchNorm1d(hidden_dim),
                    nn.ReLU(),
                    nn.Dropout(dropout_rate)
                ))
                prev_dim = hidden_dim

            layers.append(nn.Linear(prev_dim, 1))
            self.model = nn.Sequential(*layers)

        def forward(self, x):
            return self.model(x).squeeze()

    def __init__(self, dropped_cols, agent_cols):
        self.processor = self.DataProcessor(dropped_cols, agent_cols)
        self.models = []
        self.scalers = []

    def train_model(self, train_df, group_col):
        X = train_df.drop(['utility_agent1'], axis=1)
        y = train_df['utility_agent1']

        self.Config.mlp_params['input_dim'] = X.shape[1]
        group_kfold = GroupKFold(n_splits=self.Config.n_splits)

        for fi, (train_idx, valid_idx) in enumerate(group_kfold.split(X, y, groups=group_col)):
            print(f'Fold {fi + 1}/{self.Config.n_splits} ...')

            X_train, X_valid = X.iloc[train_idx].values, X.iloc[valid_idx].values
            y_train, y_valid = y.iloc[train_idx].values, y.iloc[valid_idx].values

            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_valid = scaler.transform(X_valid)

            train_dataset = self.MCTSDataset(X_train, y_train)
            valid_dataset = self.MCTSDataset(X_valid, y_valid)
            train_loader = DataLoader(train_dataset, batch_size=self.Config.mlp_params['batch_size'], shuffle=True)
            valid_loader = DataLoader(valid_dataset, batch_size=self.Config.mlp_params['batch_size'])

            model = self.MLPModel(
                self.Config.mlp_params['input_dim'],
                self.Config.mlp_params['hidden_dims'],
                self.Config.mlp_params['dropout_rate']
            ).to(self.Config.device)

            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=self.Config.mlp_params['learning_rate'])

            best_valid_rmse = float('inf')
            for epoch in range(self.Config.mlp_params['epochs']):
                model.train()
                for batch_X, batch_y in train_loader:
                    batch_X, batch_y = batch_X.to(self.Config.device), batch_y.to(self.Config.device)
                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()

                model.eval()
                valid_preds = []
                valid_targets = []
                with torch.no_grad():
                    for batch_X, batch_y in valid_loader:
                        batch_X = batch_X.to(self.Config.device)
                        outputs = model(batch_X)
                        valid_preds.extend(outputs.cpu().numpy())
                        valid_targets.extend(batch_y.numpy())

                valid_rmse = mean_squared_error(valid_targets, valid_preds, squared=False)
                if valid_rmse < best_valid_rmse:
                    best_valid_rmse = valid_rmse

            self.models.append(model)
            self.scalers.append(scaler)

    def infer_model(self, test_df):
        predictions = []
        for model, scaler in zip(self.models, self.scalers):
            X = scaler.transform(test_df.values)
            X_tensor = torch.FloatTensor(X).to(self.Config.device)

            model.eval()
            with torch.no_grad():
                predictions.append(model(X_tensor).cpu().numpy())

        return np.mean(predictions, axis=0)

    def predict(self, test, submission):
        processed_test_df = self.processor.process_and_engineer(test)
        predictions = self.infer_model(processed_test_df)
        # return submission.with_columns(
        #     pl.Series('utility_agent1', predictions)
        # )
        return predictions
       

---
---
# **》》》Filanly. Blend & Call Inference**
---
---

In [None]:
# def predict(test, submission):
#      result_1 = model_1.predict(test, submission)
#      result_2 = model_2.predict(test, submission)
#      return submission.with_columns(pl.Series('utility_agent1', result_1*0.50 + result_2*0.50))

# if APP.local and not APP.submit:
#      test = pl.read_csv(APP.test_file)
#      submission = pl.read_csv(APP.sample_subm_file)
#      result = predict(test, submission)
# else:
#      # Call the gateway server
#      inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(predict)
#      if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
#          inference_server.serve()
#      else:
#          inference_server.run_local_gateway((APP.test_file, APP.sample_subm_file))

In [None]:
def predict(test, submission):
    result_1 = model_1.predict(test, submission)
    result_2 = model_2.predict(test, submission)
    # result_3 = model_3.predict(test, submission)
    final_prediction = (result_1 * 0.5 + result_2 * 0.5)
    return submission.with_columns(pl.Series('utility_agent1', final_prediction))

if APP.local and not APP.submit:
    test = pl.read_csv(APP.test_file)
    submission = pl.read_csv(APP.sample_subm_file)
    result = predict(test, submission)
else:
    # Call the gateway server
    inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(predict)
    if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
        inference_server.serve()
    else:
        inference_server.run_local_gateway((APP.test_file, APP.sample_subm_file))