In [1]:
import os
import platform
import random
import json
import torch
import mlx_lm
import math
import pandas as pd
from abc import ABC, abstractmethod
IS_MAC = platform.system() == "Darwin"
HAS_CUDA = False if IS_MAC else torch.cuda.is_available()

class ModelLoader(ABC):
    @abstractmethod
    def load(self, model_name: str, adapter_path: str = None):
        pass

    @abstractmethod
    def generate(self, model, tokenizer, prompt: str, max_tokens: int = 500):
        pass

class MLXLoader(ModelLoader):
    def load(self, model_name, adapter_path=None):
        from mlx_lm import load
        return load(model_name, adapter_path=adapter_path)
    
    def generate(self, model, tokenizer, prompt, max_tokens=500, verbose=False):
        from mlx_lm import generate
        return generate(model, tokenizer, prompt=prompt, max_tokens=max_tokens, verbose=verbose)

class CUDALoader(ModelLoader):
    def load(self, model_name, adapter_path=None):
        from transformers import AutoModelForCausalLM, AutoTokenizer
        import torch
        
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="auto",
            torch_dtype=torch.float16
        )
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        if adapter_path:
            from peft import PeftModel
            model = PeftModel.from_pretrained(model, adapter_path)
            
        return model, tokenizer
    
    def generate(self, model, tokenizer, prompt, max_tokens=500):
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
            )
        return tokenizer.decode(outputs[0], skip_special_tokens=True)

def get_model_loader():
    if IS_MAC:
        return MLXLoader()
    return CUDALoader()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False
    
def read_first_message():
    with open('../data/poker-preflop/test.jsonl', 'r') as f:
        first_line = f.readline()
        message = json.loads(first_line)
        message = message['messages']
        return message


def read_line_with_index(line_index):
    with open('../data/poker-preflop/test.jsonl', 'r') as f:
        for idx, line in enumerate(f):
            if idx == line_index:
                message = json.loads(line)
                message = message['messages']
                return message
    return None


def read_random_message():
    messages = []
    with open('../data/poker-preflop/test.jsonl', 'r') as f:
        messages = [line for line in f]
    random_line = random.choice(messages)
    message = json.loads(random_line)
    message = message['messages']
    return message


def calculate_aa_em(predictions, ground_truths, alpha=5):
    aa = 0
    em = 0
    for i in range(len(predictions)):
        action_predicted = predictions[i].strip().split()[0].lower()
        action_ground_truth = ground_truths[i].strip().split()[0].lower()
        if action_predicted == action_ground_truth:
            aa += 1
            if action_ground_truth == 'bet' or action_ground_truth == 'raise':
                amount_predicted = predictions[i].strip().split()[-1]
                amount_ground_truth = ground_truths[i].strip().split()[-1]
                # check if amount is a number
                if is_number(amount_predicted) and is_number(amount_ground_truth):
                    real_error = abs(float(amount_predicted) - float(amount_ground_truth))/float(amount_ground_truth)
                    em += math.exp(-alpha * (real_error ** 2))
            else:
                em += 1
    return aa / len(predictions), em / len(predictions)

In [3]:
loader = get_model_loader()
# model_name = "Qwen/Qwen2.5-7B-Instruct-1M"
# adapter_name = "Qwen2.5-7B-Instruct-1M"
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
adapter_name = "Meta-Llama-3-8B-Instruct"
model, tokenizer = loader.load(
    model_name
    # , adapter_path=f"../adapters/{adapter_name}"
)

Fetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 107797.53it/s]


In [4]:
message = read_random_message()
print(message[-1]['content'])
message.pop()
prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True)
response = loader.generate(model, tokenizer, prompt)
print(response)

fold
Call


In [5]:
# ground_truths = []
# predictions = []
# message2 = read_random_message()
# print(message2[-1]['content'])
# ground_truths.append(message2[-1]['content'])
# message2.pop()
# prompt2 = tokenizer.apply_chat_template(message2, add_generation_prompt=True)

# response = loader.generate(model, tokenizer, prompt)
# print(response)
# predictions.append(response)
# aa, em = calculate_aa_em(predictions, ground_truths)
# print(f"AA: {aa}, EM: {em}")

In [6]:
ground_truths = []
predictions = []

# ground_truths.append("raise 22.0")
# predictions.append("Raise 20.0")

# aa, em = calculate_aa_em(predictions, ground_truths)
# test = "raise 22.0"

# number =test.strip().split()[-1].lower()
# print(is_number(number))

# print(f"AA: {aa}, EM: {em}")

In [7]:
for i in range(1000):
    message = read_line_with_index(i)
    # print(message[-1]['content'])
    ground_truths.append(message[-1]['content'])
    message.pop()
    prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True)
    response = loader.generate(model, tokenizer, prompt)
    # print(response)
    predictions.append(response)

aa, em = calculate_aa_em(predictions, ground_truths)
print(f"AA: {aa}, EM: {em}")
# print(predictions)
# print(ground_truths)

AA: 0.281, EM: 0.26733209873388614


In [8]:
# Create a DataFrame for predictions and ground truths
results_df = pd.DataFrame({
    "Prediction": predictions,
    "Ground Truth": ground_truths
})

# # Create a DataFrame for metrics: AA and EM
# metrics_df = pd.DataFrame({
#     "Metric": ["AA", "EM"],
#     "Value": [aa, em]
# })

output_path = "../testing-results"
# Save the DataFrames to CSV files
results_df.to_csv(f"{output_path}/{adapter_name}_predictions.csv", index=False)
# metrics_df.to_csv(f"{output_path}/{adapter_name}_metrics.csv", index=False)