In [56]:
from typing import Dict, Any, List
import json
import requests

def load_json(path) -> Dict[str, Any]:
    with open(path, 'r', encoding="utf-8") as json_file:
        documents = json.load(json_file)
    return documents


In [57]:
data = load_json(r"D:\Desktop\arb-refactor\data\reports\alpha_report.json")

In [58]:
def flatten_list_2d(list_2d):
    """
    Flattens a 2D list into a 1D list.
    
    Args:
        list_2d: A list containing nested lists
        
    Returns:
        list: A flattened 1D list containing all elements
    """
    return [item for sublist in list_2d for item in sublist]


# ***Rule base***


In [59]:
import re
from typing import List

def filter_words(query: str, words: List[str]) -> str:
    """
    Filter words from a query that match items in a given list, case insensitive.
    
    Args:
        query (str): The input query string to search
        words (List[str]): List of words to match against
        
    Returns:
        str: The first matching word found, or empty string if no match
    """
    # Convert query to lowercase for case-insensitive matching
    query = query.lower()
    
    # Create pattern matching any word in the list
    pattern = '|'.join(map(re.escape, [w.lower() for w in words]))
    
    # Find all matches
    matches = re.findall(pattern, query)
    
    # Return original casing of first match if found
    if matches:
        match_lower = matches[0]
        # Find original word with matching lowercase
        for word in words:
            if word.lower() == match_lower:
                return word
                
    return ""

In [60]:
list_product_detail = data['/winlost_detail']['function']['parameters']['properties']['product']['abbreviation'].values()
d = flatten_list_2d(list_product_detail)

In [68]:
filter_words("I want to get wl report for  only", d)

# Add existence checker

''

In [21]:
import requests
import json
from typing import List, Any
# Abbreviation Solving Direction: 

def flatten_list_2d(list_2d: List[List[Any]]) -> List[Any]:
    """
    Flatten a 2D list into a 1D list.
    
    Args:
        list_2d (List[List[Any]]): The 2D list to flatten
        
    Returns:
        List[Any]: The flattened 1D list
    """
    return [item for sublist in list_2d for item in sublist]

# 1. Initialize a validiation agent
def get_abbreviations_prompt(function_called: str, data: Dict[str, Any]) -> str:
    parameter_properties = data['function']['parameters']['properties']
    
    list_abb = []
    for param, param_info in parameter_properties.items():
        abb = flatten_list_2d(param_info['abbreviation'].values())
        prompt = f"""
        {param}:\n
        {abb}
        """
        list_abb.append(prompt)
    return "\n".join(list_abb)

def abbreviation_calling_agent(data: Dict[str, Any], message: str) -> Dict[str, str]:
    # API endpoint
    url = "https://ollama.selab.edu.vn/api/chat"

    # Construct the prompt for parameter determination with examples
    abbreviated_parameters = get_abbreviations_prompt(data)
    instructions = f"""
        # General conversation guidelines:
        - Please help me identify the parameter from user's query
        - Return your answer in JSON format with a single key "parameter_called"
        - The available parameters are:
        {abbreviated_parameters}
    """
    few_shot = f"""
    # ***Example Scenarios:***
    
    - ***User***: "I want to get wl report for day 10"
    - ***Assistant***: {{"function_called": "/winlost_detail"}}
    
    - ***User***: "I want to get w/l report for day 10" 
    - ***Assistant***: {{"function_called": "/winlost_detail"}}
    
    - ***User***: "Show me the WL detail report"
    - ***Assistant***: {{"function_called": "/winlost_detail"}}
    
    - ***User***: "I want to get to report"
    - ***Assistant***: {{"function_called": "/turnover_detail"}}
    
    - ***User***: "I want to get revenue report"
    - ***Assistant***: {{"function_called": "/turnover_detail"}}
    
    """
    user_prompt = f"""
    User's message: {message}
    
    {instructions}
    
    {few_shot}
    """
    
    system_prompt = f"You are a helpful assistant that identifies which function is abbreviated by user's query"
    
    format_schema =  {
        "type": "object",
        "properties": {
            "date_range": {"type": "string"},
            "from_date": {"type": "string"},
            "to_date": {"type": "string"},
            "product": {"type": "string"},
            "product_detail": {"type": "string"},
            "level": {"type": "string"},
            "user": {"type": "string"}
        },
        "required": ["date_range", "from_date", "to_date", "product", "product_detail", "level", "user"]
    }
    model = 'qwen2.5:14b'
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    headers = {"Content-Type": "application/json"}

    data = {
        "model": model,
        "messages": messages,
        "format": format_schema,
        "stream": False
    }

    response = requests.post(url, headers=headers, data=json.dumps(data))
    return json.loads(response.json()['message']['content'])

In [22]:
abbreviation_calling_agent(data, "turnover report please")

{'function_called': '/turnover'}

# ***1. check -> abbreviation | origin -> output***

In [None]:
# 2. Create a agent to check whether the abbreviation is valid
def abbreviation_recognizer_agent(data: Dict[str, Any], message: str) -> bool:
    # API endpoint
    url = "https://ollama.selab.edu.vn/api/chat"

    abbreviated_functions = get_abbreviations_prompt(data)

    # Construct the prompt for function determination with examples
    system_prompt = "You are a helpful assistant that identifies whether the p in user's query is abbreviated or not."
    
    instructions = f"""
    - Return your answer in JSON format with a single key "is_abbreviated"
    - The available abbreviations are:
        {abbreviated_functions}
    """
    
    few_shot = f"""
    # ***Example Scenarios:***
    
    - ***User***: "Winlost report please"
    - ***Assistant***: {{"is_abbreviated": 0}}
    
    - ***User***: "Currently, I would like to get the outstanding report"
    - ***Assistant***: {{"is_abbreviated": 0}}
    
    - ***User***: "I want to get wl report for day 10"
    - ***Assistant***: {{"is_abbreviated": 1}}
    
    - ***User***: "I want to get w/l report for day 10" 
    - ***Assistant***: {{"is_abbreviated": 1}}
    
    - ***User***: "Show me the WL detail report"
    - ***Assistant***: {{"is_abbreviated": 1}}
    
    - ***User***: "TO report please"
    - ***Assistant***: {{"is_abbreviated": 1}}
    
    - ***User***: "I want to get TO report"
    - ***Assistant***: {{"is_abbreviated": 1}}
    
    - ***User***: "I want to get TO report for day 10"
    - ***Assistant***: {{"is_abbreviated": 1}}
    
    - ***User***: "I want to get TO report for day 10"
    - ***Assistant***: {{"is_abbreviated": 1}}
    
    - ***User***: "I want to get TO report for day 10"
    - ***Assistant***: {{"is_abbreviated": 1}}
    
    - ***User***: "I want to get TO report for day 10"
    - ***Assistant***: {{"is_abbreviated": 1}}
    """
    
    user_prompt = f"""
    User's message: {message}
    
    {instructions}
    
    {few_shot}
    """
    
    format_schema =  {
        "type": "object",
        "properties": {
            "is_abbreviated": {
                "type": "int"
            }
        },
        "required": ["is_abbreviated"]
    }

    model = 'qwen2.5:14b'
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    headers = {"Content-Type": "application/json"}

    data = {
        "model": model,
        "messages": messages,
        "format": format_schema,
        "stream": False
    }

    response = requests.post(url, headers=headers, data=json.dumps(data))
    return json.loads(response.json()['message']['content'])


# ***2. abb & origin -> validator -> output***

In [34]:
def get_abbreviated_parameters(function_called: str, data: Dict[str, Any]) -> List[str]:
    parameter_properties = data[function_called]['function']['parameters']['properties']
    
    list_abbreviation_prompt = []
    for param, param_info in parameter_properties.items():
        
        if "abbreviation" in param_info:
            flattened_abbreviation = flatten_list_2d(param_info['abbreviation'].values())
            abbreviation_prompt = f"""
            ### {param}: {flattened_abbreviation} 
            """
            list_abbreviation_prompt.append(abbreviation_prompt)
            
    return "\n".join(list_abbreviation_prompt)

In [35]:
def get_enum4abbreviation(function_called: str, data: Dict[str, Any]) -> List[str]:
    parameter_properties = data[function_called]['function']['parameters']['properties']
    enum_info = {}
    for param, param_info in parameter_properties.items():
        if "abbreviation" in param_info:
            enum = param_info['enum']
            enum_info[param] = enum
    return enum_info


In [36]:
enum = get_enum4abbreviation("/winlost_detail", data)

In [39]:
# 2. Create a agent to check whether the abbreviation is valid
def abbreviation_recognizer_agent(function_called: str, data: Dict[str, Any], message: str) -> bool:
    # API endpoint
    url = "https://ollama.selab.edu.vn/api/chat"

    abbreviated_parameters = get_abbreviated_parameters(function_called, data)
    enum = get_enum4abbreviation(function_called, data)
    # Construct the prompt for function determination with examples
    system_prompt = """
        You are an AI assistant majoring for Named Entity Recognition trained to extract entity and categorize queries for Winlost Report Detail
    """
    
    instructions: str= """
    # Define your task:
    - Extract the most relevant keywords from the following sentence: '{query}'. 
    - You must detect all the keywords based on the abbreviation below:
        {abbreviated_parameters}
    - Return the following format output:
        {{
            "product": "<product_name>",
            "product_detail": "<product_detail_name>",
        }}     
    - If the product is not specified, please return 'All' for product.
    - If the product_detail is not specified, please return 'All' for product_detail.
    """.format(query=message, abbreviated_parameters=abbreviated_parameters)
    
    few_shot = f"""
    # ***Example Scenarios:***
    
    - ***User***: "SBB please"
    - ***Assistant***: {{"product": "All", "product_detail": "SBB"}}
    
    - ***User***: "I want to get wl report for SBEPG and SB"
    - ***Assistant***: {{"product": "SB", "product_detail": "SBEPG"}}
    
    - ***User***: "SB please for wl report"
    - ***Assistant***: {{"product": "SB", "product_detail": "All"}}
    
    - ***User***: "Give me wl report for Num GAME" 
    - ***Assistant***: {{"product": "All", "product_detail": "Num Game"}}
    
    - ***User***: "Show me the WL detail report for sag"
    - ***Assistant***: {{"product": "SAG", "product_detail": "All"}}
    
     - ***User***: "I want to know wl report for sb basket pin and fun game please"
    - ***Assistant***: {{"product": "fun game", "product_detail": "sb basket pin"}}
    """
    
    user_prompt = f"""
    User's message: {message}
    
    {instructions}
    
    {few_shot}
    """
    print(user_prompt)
    format_schema =  {
        "type": "object",
        "properties": {
            "product": {
                "type": "string",
                "enum": enum['product']
            },
            "product_detail": {
                "type": "string",
                "enum": enum['product_detail']
            }
        },
        "required": ["product", "product_detail"]
    }

    model = 'qwen2.5:14b'
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    headers = {"Content-Type": "application/json"}

    data = {
        "model": model,
        "messages": messages,
        "format": format_schema,
        "stream": False
    }

    response = requests.post(url, headers=headers, data=json.dumps(data))
    return json.loads(response.json()['message']['content'])


In [40]:
abbreviation_recognizer_agent("/winlost_detail", data, "give me wl report for NG")


    User's message: give me wl report for NG
    
    
    # Define your task:
    - Extract the most relevant keywords from the following sentence: 'give me wl report for NG'. 
    - You must detect all the keywords based on the abbreviation below:
        
            ### product: ['SB', 'SprtBooks', 'NG', 'Num Game', 'VS', 'SC', 'RNGK', 'AGC', 'SB2', 'RNGS', 'CRK', 'AB', 'MG', 'CO', 'LTY', 'VJP', 'EX', 'PP', 'AG', 'SG', 'SPr', 'VG', 'SAG', 'TG', 'LC', 'SGM', 'T4D', 'RNGG', 'AES', 'IBC-LC', 'BBIN', 'GPI', 'WM', 'ION', 'RNGC', 'SVS', 'PGS', 'JKR', 'BG', 'MXG', 'HB', 'CG', 'SL', 'PP-LC', 'VGAM', 'AP', 'APM', 'BTC', 'FGG', 'SEAL', 'MGR', 'PT', 'JILI', 'YB', 'UUS', 'L22', 'WE-LC', 'SBC', 'YP', 'NSP', 'SXD', 'FSP', 'FC', 'GPI-LC', 'PS', 'HDG', 'ONC', 'SS', 'PTC', 'SBK', 'AMS', 'FG', 'fun game'] 
            

            ### product_detail: ['SBB', 'SABA B', 'SB Basketball', 'SBBPG', 'sb basket pin', 'SBEPG', 'SAOS', 'SS', 'SSPG', 'ST', 'SB', 'ABP', 'PPJC', 'PPJP', 'SGJC', 'SGJP', 'AESLD

{'product': 'Allbet', 'product_detail': 'Allbet Promotion'}