In [1]:
import os
import re

import pandas as pd
import polars as pl
import tensorflow as tf
import numpy as np

import keras
from keras import layers, losses, metrics

# from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

In [2]:
import kaggle_evaluation.mcts_inference_server

In [3]:
model = keras.saving.load_model('/kaggle/input/lstm_for_eng_and_lud_v1/keras/default/1/lstm_both_epoch01_val_0.1953.keras')

In [4]:
class VectorNormalizer(keras.Layer):
    def __init__(self, dict_size: int) -> None:
        super().__init__()
        self.dict_size = dict_size

    def call(self, inputs):
        return inputs / self.dict_size

In [5]:
pl_df = pl.read_csv('/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv')

agents_list = pl_df.select('agent1').unique().rows()
agents_list = [agent[0] for agent in agents_list]
agents_list.sort()

agent_code = {}

for ind, agent in enumerate(agents_list):
    agent_code[agent] = ind

In [6]:
agent_code

{'MCTS-ProgressiveHistory-0.1-MAST-false': 0,
 'MCTS-ProgressiveHistory-0.1-MAST-true': 1,
 'MCTS-ProgressiveHistory-0.1-NST-false': 2,
 'MCTS-ProgressiveHistory-0.1-NST-true': 3,
 'MCTS-ProgressiveHistory-0.1-Random200-false': 4,
 'MCTS-ProgressiveHistory-0.1-Random200-true': 5,
 'MCTS-ProgressiveHistory-0.6-MAST-false': 6,
 'MCTS-ProgressiveHistory-0.6-MAST-true': 7,
 'MCTS-ProgressiveHistory-0.6-NST-false': 8,
 'MCTS-ProgressiveHistory-0.6-NST-true': 9,
 'MCTS-ProgressiveHistory-0.6-Random200-false': 10,
 'MCTS-ProgressiveHistory-0.6-Random200-true': 11,
 'MCTS-ProgressiveHistory-1.41421356237-MAST-false': 12,
 'MCTS-ProgressiveHistory-1.41421356237-MAST-true': 13,
 'MCTS-ProgressiveHistory-1.41421356237-NST-false': 14,
 'MCTS-ProgressiveHistory-1.41421356237-NST-true': 15,
 'MCTS-ProgressiveHistory-1.41421356237-Random200-false': 16,
 'MCTS-ProgressiveHistory-1.41421356237-Random200-true': 17,
 'MCTS-UCB1-0.1-MAST-false': 18,
 'MCTS-UCB1-0.1-MAST-true': 19,
 'MCTS-UCB1-0.1-NST-fals

In [7]:
class PredictDataGenerator(keras.utils.Sequence):
    eqpmnt_re = r'\(equipment'
    rulestrip_re = r'[^a-zA-Z\(\)\{\}]'
    engvector_len = 900
    ludvector_len = 23700
    
    def __init__(self, list_IDs, dataframe, agent_code, batch_size=20, shuffle=True):
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        # self.labels_dict = labels_dict
        self.df = dataframe[['agent1', 'agent2', 'EnglishRules', 'LudRules']]
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.list_IDs))
        self._agent_code = agent_code

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, batch_num):
          # Generate indexes of the batch
        indexes = self.indexes[batch_num*self.batch_size:(batch_num+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        # Generate data
        X = self.__data_generation(list_IDs_temp)

        return X

    def on_epoch_ends(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def _lud_prepare(self, rule) -> str:
        start = re.search(self.eqpmnt_re, rule).span()[0]
        pure_rule = rule[start:]
        pure_rule = re.sub(self.rulestrip_re, ' ', pure_rule)
        return pure_rule

    def __data_generation(self, list_IDs_temp):
        X_agents = []
        X_engvectors = []
        X_ludvectors = []
        # y = []

        agents_num = len(self.agent_code)
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            row = self.df.loc[ID]
            agent1, agent2, engrul, ludrul = row

            # y.append(self.labels_dict[ID])

            agent1_encoded = keras.utils.to_categorical(self.agent_code[agent1], agents_num)
            agent2_encoded = keras.utils.to_categorical(self.agent_code[agent2], agents_num)
            agents_encoded = np.hstack((agent1_encoded, agent2_encoded))
            X_agents.append(np.array(agents_encoded))

            engrul_vector = eng_preprocessor(engrul)
            engrul_vector = keras.utils.pad_sequences((engrul_vector,), maxlen=self.engvector_len)
            X_engvectors.append(engrul_vector)

            ludrul_vector = lud_preprocessor(self._lud_prepare(ludrul))
            ludrul_vector = keras.utils.pad_sequences((ludrul_vector,), maxlen=self.ludvector_len)
            X_ludvectors.append(ludrul_vector)

        return (np.array(X_agents), np.array(X_engvectors), np.array(X_ludvectors))

In [8]:
def predict(test: pl.DataFrame, sample_sub: pl.DataFrame):
    df = test.to_pandas()
    test_df = df[['agent1', 'agent2', 'EnglishRules', 'LudRules']]
    test_generator = PredictDataGenerator(test_df.index, agent_code=agent_code, dataframe=test_df)

    y = model.predict(test_generator)
    print(y)
    
    return sample_sub.with_columns(pl.col('utility_agent1') + 0.123)

In [9]:
inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
        )
    )

GatewayRuntimeError: (<GatewayRuntimeErrorType.SERVER_RAISED_EXCEPTION: 3>, '<_InactiveRpcError of RPC that terminated with:\n\tstatus = StatusCode.UNKNOWN\n\tdetails = "Exception calling application: Must provide at least one structure"\n\tdebug_error_string = "UNKNOWN:Error received from peer  {created_time:"2024-10-24T07:23:21.851079626+00:00", grpc_status:2, grpc_message:"Exception calling application: Must provide at least one structure"}"\n>')