In [1]:
# Use a pipeline as a high-level helper
from transformers import pipeline
ner_pipe = pipeline("token-classification", model="NguyenMinh03082004/velectra_fine_tune_for_laptop_ner")

In [2]:
id_to_label = {
    "LABEL_0": 'B-ai_task',
    "LABEL_1": 'B-basic_task',
    "LABEL_2": 'B-battery',
    "LABEL_3": 'B-battery_capacity',
    "LABEL_4": 'B-brand',
    "LABEL_5": 'B-cheap_option',
    "LABEL_6": 'B-cpu',
    "LABEL_7": 'B-designer',
    "LABEL_8": 'B-developer',
    "LABEL_9": 'B-game_task',
    "LABEL_10": 'B-gpu',
    "LABEL_11": 'B-high_fresh_rate',
    "LABEL_12": 'B-high_performance',
    "LABEL_13": 'B-light_weight',
    "LABEL_14": 'B-long_battery',
    "LABEL_15": 'B-os',
    "LABEL_16": 'B-price_range',
    "LABEL_17": 'B-prod_name',
    "LABEL_18": 'B-quick_charge',
    "LABEL_19": 'B-ram',
    "LABEL_20": 'B-screen_fresh_rate',
    "LABEL_21": 'B-screen_quality',
    "LABEL_22": 'B-screen_resolution',
    "LABEL_23": 'B-screen_size',
    "LABEL_24": 'B-weight',
    "LABEL_25": 'I-ai_task',
    "LABEL_26": 'I-basic_task',
    "LABEL_27": 'I-cheap_option',
    "LABEL_28": 'I-cpu',
    "LABEL_29": 'I-designer',
    "LABEL_30": 'I-developer',
    "LABEL_31": 'I-game_task',
    "LABEL_32": 'I-gpu',
    "LABEL_33": 'I-high_fresh_rate',
    "LABEL_34": 'I-high_performance',
    "LABEL_35": 'I-light_weight',
    "LABEL_36": 'I-long_battery',
    "LABEL_37": 'I-os',
    "LABEL_38": 'I-price_range',
    "LABEL_39": 'I-prod_name',
    "LABEL_40": 'I-quick_charge',
    "LABEL_41": 'I-screen_fresh_rate',
    "LABEL_42": 'I-screen_quality',
    "LABEL_43": 'I-screen_resolution',
    "LABEL_44": 'O'
}


In [3]:
def combine_tokens_and_labels(tokens, predicted_labels):
    combined_tokens = []
    combined_labels = []
    current_token = ""
    current_label = ""

    for token, label in zip(tokens, predicted_labels):
        if token.startswith("##"):
            current_token += token[2:]  # Append subword (remove '##')
        elif token in ["[CLS]", "[SEP]", "[PAD]"]:  # ignored token.
            continue
        elif current_token and (token.startswith(".") or (current_token.endswith(".") and token.isdigit())): 
            current_token += token
        else:
            if current_token:  # Add the last token and label
                combined_tokens.append(current_token)
                combined_labels.append(current_label)
            current_token = token
            current_label = label

    # Finalize the last token
    if current_token:
        combined_tokens.append(current_token)
        combined_labels.append(current_label)

    return combined_tokens, combined_labels

In [4]:
def bio_to_entities(combined_tokens, combined_labels):
    entities = []
    current_entity = ""
    current_label = ""

    for token, label in zip(combined_tokens, combined_labels):
        if label.startswith('B-'):  # beginning of new entity
            if current_entity:  # if current entity != null => end old entity
                entities.append((current_entity.strip(), current_label))
            current_entity = token  # start new entity
            current_label = label[2:]  # remove the B-, keep only entity
        elif label.startswith('I-') and current_entity:  # Continuation of the current entity
            current_entity += " " + token  # Append token to the current entity
        else:  # End of the current entity
            if current_entity:  # Save the entity if it exists
                entities.append((current_entity.strip(), current_label))
                current_entity = ""  # Reset for next entity
                current_label = ""

    # Finalize any remaining entity
    if current_entity:
        entities.append((current_entity.strip(), current_label))

    # Merging logic for combined product names
    merged_entities = []
    i = 0
    while i < len(entities):
        entity, label = entities[i]

        # case for updating product name = brand + product name
        if label == 'brand':
            if (i + 1 < len(entities) and entities[i + 1][1] == 'prod_name'):
                entity += " " + entities[i + 1][0]  # Merge the names
                label = 'prod_name'  # change label to prod_name
                i += 1  # skip the next entity 

        merged_entities.append((entity.strip(), label))
        i += 1

    return merged_entities

In [5]:
import torch
from transformers import AutoTokenizer, AutoConfig, AutoModel, MT5Model

In [6]:
import torch
from transformers import MT5Model, AutoConfig

class MT5EncoderForIntentRecognition(torch.nn.Module):
    def __init__(self, encoder, num_labels, tfidf_dim):
        super(MT5EncoderForIntentRecognition, self).__init__()
        self.encoder = encoder
        self.dropout = torch.nn.Dropout(0.1)
        self.tfidf_dense = torch.nn.Linear(tfidf_dim, encoder.config.d_model)
        self.classifier = torch.nn.Linear(encoder.config.d_model * 2, num_labels)  # Combine mT5 + TF-IDF

    @classmethod
    def from_pretrained(cls, model_path, num_labels, tfidf_dim):
        config = AutoConfig.from_pretrained(model_path)
        encoder = MT5Model.from_pretrained(model_path).encoder
        model = cls(encoder, num_labels, tfidf_dim)
        state_dict = torch.load(f"{model_path}/pytorch_model.bin", map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

        # Load the encoder state_dict
        model.encoder.load_state_dict(state_dict['encoder_state_dict'], strict=False)
        model.tfidf_dense.load_state_dict(state_dict['tfidf_dense_state_dict'], strict=False)
        model.classifier.load_state_dict(state_dict['classifier_state_dict'], strict=False)

        return model

    def forward(self, input_ids, attention_mask=None, tfidf=None, labels=None):
        # mT5 encoding
        encoder_outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = encoder_outputs.last_hidden_state
        pooled_output = sequence_output[:, 0, :]  # CLS token embedding

        # TF-IDF encoding
        tfidf_output = self.tfidf_dense(tfidf)
        tfidf_output = self.dropout(tfidf_output)

        # Combine mT5 and TF-IDF features
        combined_output = torch.cat([pooled_output, tfidf_output], dim=1)
        logits = self.classifier(combined_output)

        loss = None
        if labels is not None:
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.classifier.out_features), labels.view(-1))

        return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}


In [7]:
model_path = './mt5_encoder_intent_for_laptop_conversation_ver4'

In [8]:
tokenizer_intent = AutoTokenizer.from_pretrained(model_path)
model_intent = MT5EncoderForIntentRecognition.from_pretrained(model_path, num_labels=14, tfidf_dim=922)
device = torch.device('cpu')
model_intent.to(device)

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Some weights of MT5Model were not initialized from the model checkpoint at ./mt5_encoder_intent_for_laptop_conversation_ver4 and are newly initialized: ['decoder.block.0.layer.0.SelfAttention.k.weight', 'decoder.block.0.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.0.SelfAttention.q.weight', 'decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight', 'decoder.block.0.layer.0.SelfAttention.v.weight', 'decoder.block.0.layer.0.layer_norm.weight', 'decoder.block.0.layer.1.EncDecAttention.k.weight', 'decoder.block.0.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.1.EncDecAttention.q.weight', 'decoder.block.0.layer.1.EncDecAttention.v.weight', 'decoder.block.0.layer.1.layer_norm.weight', 'decoder.block.0.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.0.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.0.layer.2.DenseReluDense.wo.weight', 'decoder.block.0.layer.2.lay

MT5EncoderForIntentRecognition(
  (encoder): MT5Stack(
    (embed_tokens): Embedding(250112, 512)
    (block): ModuleList(
      (0): MT5Block(
        (layer): ModuleList(
          (0): MT5LayerSelfAttention(
            (SelfAttention): MT5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): MT5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): MT5LayerFF(
            (DenseReluDense): MT5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): Linear(in_features=102

In [9]:
num_to_label = {0: 'Change Decision', 
                1: 'Comparison Inquiry', 
                2: 'Delivery Options', 
                3: 'Feature Confirmation', 
                4: 'Find Similar', 
                5: 'Interest Confirmation', 
                6: 'Payment Options', 
                7: 'Price Inquiry', 
                8: 'Product Availability', 
                9: 'Purchase Decision', 
                10: 'Return Policy Inquiry', 
                11: 'Specific Need', 
                12: 'Thank You/Closing', 
                13: 'Warranty Inquiry'}





In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer

def predict_intent(model, tokenizer, sentence, vectorizer):
    #tokenize input
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=70)

    # calculate tf-idf
    tfidf_vector = vectorizer.transform([sentence]).toarray()
    tfidf_tensor = torch.tensor(tfidf_vector, dtype=torch.float32)

    device = next(model.parameters()).device
    inputs = {key: value.to(device) for key, value in inputs.items()}
    tfidf_tensor = tfidf_tensor.to(device)

    inputs["tfidf"] = tfidf_tensor

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs["logits"]
    predicted_label = logits.argmax(-1).item()
    return num_to_label[predicted_label]

In [11]:
def predict_ner(text, ner_pipe, id_to_label):
    ner_results = ner_pipe(text)
    tokens = [result['word'] for result in ner_results]
    labels = [id_to_label[result['entity']] for result in ner_results]
    combined_tokens, combined_labels = combine_tokens_and_labels(tokens, labels)
    entities = bio_to_entities(combined_tokens, combined_labels)
    return entities


In [12]:
product = {
    "prod_name": None,
    "brand": None, 
    "cpu": None, 
    "gpu": None,
    "ram": None, #
    "battery": None, #
    "price_range": None,
    "screen_size": None,
    "screen_resolution": None,
    "screen_fresh_rate": None, #
    "os": None, 
    "battery_capacity": None,
    "weight": None, #
    "cpu_score": None, # it will not be printed, used for evaluating
    "gpu_score": None, # used for evaluating
    "screen_resolution_score": None, # used to evaluate
    "evaluate_score": None
}

In [13]:
dialogue_state = {
    "intent": None,
    "entities_in_conversation": {
        "prod_name": {"values": [], "selected": None},
        "brand": {"values": [], "selected": None},
        "cpu": {"values": [], "selected": None},
        "gpu": {"values": [], "selected": None},
        "ram": {"values": [], "selected": None},
        "battery": {"values": [], "selected": None},
        "price_range": {"values": [], "selected": None},
        "screen_size": {"values": [], "selected": None},    
        "screen_resolution": {"values": [], "selected": None},
        "screen_fresh_rate": {"values": [], "selected": None},
        "os": {"values": [], "selected": None},
        "battery_capacity": {"values": [], "selected": None},
        "weight": {"values": [], "selected": None}
    },
    "entities_for_specific_need": {
        "high_performance": False,
        "light_weight": False,
        "long_battery": False,
        "quick_charge": False,
        "high_fresh_rate": False,
        "screen_quality": False,
        "cheap_option": False,
        "designer": False,
        "developer": False,
        "game_task": False,
        "ai_task": False,
        "basic_task": False
    },
    "product_in_current_message": [], # it will use to store a temporary product (as a response of the current question of customer) 
    "product_in_conversation": [], # store fully information of products that existed conversation. To easily serach the product that user chosse to buy at the end of conversation
    "positional_product": -10
}



In [14]:
import re

def convert_trieu_to_number(text):
    # find all numbers followed by "trieu" (case insensitive) in the text
    matches = re.findall(r'(\d+)\s*trieu', text, re.IGNORECASE)
    
    # replace with the numeric equivalent
    for match in matches:
        number_in_million = int(match) * 1000000
        text = re.sub(rf'{match}\s*trieu', str(number_in_million), text, flags=re.IGNORECASE)
    
    return text

In [15]:
import math
def normalize(product):

    # Normalizing individual attributes with checks for None values
    normalize_ram_point = (
        10 / (1 + math.exp(-0.07 * (product['ram'] - 8)))
        if product.get('ram') is not None else 0
    )

    normalize_screen_fresh_rate = (
        10 / (1 + math.exp(-0.04 * (product.get('screen_fresh_rate', 60) - 96.22)))
        if product.get('screen_fresh_rate') is not None else 8  # Default to 60 Hz
    )

    normalize_battery = (
        10 / (1 + math.exp(-1.38 * (product.get('battery', 3) - 3)))
        if product.get('battery') is not None else 0
    )

    # Weight normalization with ranges, handling None by defaulting to the lowest score (6)
    if product.get('weight') is not None:
        if product['weight'] <= 1.4:
            normalize_weight = 10
        elif product['weight'] <= 2:
            normalize_weight = 8
        else:
            normalize_weight = 6
    else:
        normalize_weight = 6  # Default for None case

    normalize_battery_capacity = (
        10 / (1 + math.exp(-0.02 * (product.get('battery_capacity', 90) - 90)))
        if product.get('battery_capacity') is not None else 0
    )

    normalize_screen_size = (
        10 / (1 + math.exp(-0.98 * (product.get('screen_size', 13) - 13)))
        if product.get('screen_size') is not None else 5
    )
    return {
        "cpu": float(product['cpu_score']),
        "gpu": float(product['gpu_score']),
        "screen_resolution": float(product['screen_resolution_score']),
        "screen_fresh_rate": normalize_screen_fresh_rate,
        "battery": normalize_battery,
        "weight": normalize_weight,
        "battery_capacity": normalize_battery_capacity,
        "screen_size": normalize_screen_size,
        "ram": normalize_ram_point
    }

In [16]:
weights = { # evaluate the importance based on the specific need of user
    "cpu": 0.0,
    "gpu": 0.0,
    "screen_resolution": 0.0,
    "screen_fresh_rate": 0.0, 
    "battery": 0.0,
    "weight": 0.0,
    "battery_capacity": 0.0,
    "screen_size": 0.0,
    "ram": 0.0
}
def updateWeight(dialogue_state):
    user_needs = dialogue_state['entities_for_specific_need']
    number_of_intent = sum(user_needs.values()) # 
    for label, value in user_needs.items():
        if value:
            if label == 'high_performance':
                weights['cpu'] += 0.35
                weights['gpu'] += 0.35
                weights['ram'] += 0.3
            if label == 'light_weight':
                weights['weight'] += 1.0
            if label == 'long_battery':
                weights['long_battery'] += 1.0
            if label == 'quick_charge':
                weights['battery_capacity'] += 1.0
            if label == 'high_fresh_rate':
                weights['screen_fresh_rate'] += 1.0
            if label == 'screen_quality':
                weights['screen_resolution'] += 0.5
                weights['screen_size'] += 0.3
                weights['screen_fresh_rate'] += 0.2
            if label == 'designer':
                weights['screen_resolution'] += 0.7
                weights['screen_size'] += 0.2
                weights['screen_fresh_rate'] += 0.1
            if label == 'developer':
                weights['cpu'] += 0.6
                weights['ram'] += 0.25
                weights['gpu'] += 0.15
            if label == 'game_task':
                weights['cpu'] += 0.25
                weights['gpu'] += 0.45
                weights['screen_fresh_rate']+= 0.15
                weights['ram']+= 0.15
            if label == 'ai_task':
                weights['cpu'] += 0.15
                weights['gpu'] += 0.6
                weights['ram'] += 0.25
            if label == 'basic_task': # focus on weight, battery, screen_resolution
                weights['cpu'] += 0.1
                weights['gpu'] += 0.1
                weights['ram'] += 0.2
                weights['battery'] += 0.2
                weights['weight'] += 0.2
                weights['screen_resolution']+= 0.2
    #print(weights)
    #print(number_of_intent)
    if number_of_intent > 1:
        for label in weights:
            weights[label] = weights[label] / number_of_intent
    #print(weights)
def evaluate(dialogue_state, product):
    cur_point = 0.0
    # formula = sum(weights['type'] * product['type'])
    user_needs = dialogue_state['entities_for_specific_need']
    normalize_product = normalize(product)
    for label in normalize_product:
        cur_point += weights[label] * normalize_product[label]             
    return cur_point

In [17]:
prods_entities = dialogue_state['entities_in_conversation']
for label, details in prods_entities.items():
    print(label)
print(len(prods_entities))

prod_name
brand
cpu
gpu
ram
battery
price_range
screen_size
screen_resolution
screen_fresh_rate
os
battery_capacity
weight
13


In [18]:
import re

def extract_positional_phrases_vietnamese(text):
    positional_keywords = {
        "đầu tiên": 0,
        "thứ nhất": 0,
        "đầu": 0,
        "thứ hai": 1,
        "thứ ba": 2,
        "cuối": -1,
        "cuối cùng": -1,
        "sau cùng": -1,
        "vừa được": -1,
        "mới được": -1,
        "vừa rồi": -1
    }

    regex = r'\b(đầu tiên|thứ nhất|đầu|thứ hai|thứ ba|cuối cùng|sau cùng|cuối|vừa được|mới được|vừa rồi)\b'
    match = re.search(regex, text.lower(), re.UNICODE)

    if match:
        matched_word = match.group(0)
        position = positional_keywords[matched_word]
        #print(matched_word)
        return position
    else:
        return -10


In [19]:
def execute_purchase(dialogue_state):
    # get label and specific value to search product in conversation that suit with user's requirement
    chosen_map = {}
    prods_entities = dialogue_state['entities_in_conversation']
    for label, values in prods_entities.items():
        if values['selected']: # if exists values['selected']
            chosen_map[label] = values['selected'] # assign value
    return chosen_map


In [20]:
def getProduct(product_chosen, dialogue_state):
    product = []
    list_product = dialogue_state['product_in_conversation']
    #print(list_product)
    for laptop in list_product:
        is_match = True
        for label, value in product_chosen.items():
            if label not in laptop:
                is_match = False
                break
            if label == "prod_name":
                if str(value).lower() not in str(laptop[label]).lower():
                    is_match = False
                    break
            elif label == "price_range":
                if isinstance(value, (list, tuple)) and len(value) == 2:
                    if not (value[0] <= laptop[label] <= value[1]):
                        is_match = False
                        break
                elif laptop[label] > value:
                    is_match = False
                    break
            else:
                if str(laptop[label]).lower() != str(value).lower():
                    is_match = False
                    break
        if is_match:
            #print(laptop)
            product.append(laptop)
    if not product:  # If no matches, return all products
        for prod in list_product:
            product.append(prod)
    return product


In [21]:
def getProductFromFilter(products, positional):
    if positional == -10: # no information about position
        return products[0]
    return products[positional]

In [22]:
def generating_sql(dialogue_state):
    base_sql_query = """
    SELECT laptop.id, brand, prod_name, ram, cpu, gpu, screen_resolution, 
           cpu_info.score AS cpu_score, gpu_info.score AS gpu_score, 
           screen_resolution_info.score AS screen_score, battery, price_range, 
           screen_size, screen_fresh_rate, os, battery_capacity, weight 
    FROM laptop 
    JOIN cpu_info USING (cpu_id) 
    JOIN gpu_info USING (gpu_id) 
    JOIN screen_resolution_info USING (screen_id)
    """
    
    needs = dialogue_state['intent']
    filter_conditions = []
    filter_sql = ""
    
    # Only apply specific needs if the intent matches "Specific Need"
    if needs in ['Specific Need', 'Comparison Inquiry']:
        prods_entities = dialogue_state['entities_in_conversation']
        for label, details in prods_entities.items():
            sub_conditions = []
            if details['values']:
                if label in ['prod_name', 'brand', 'screen_resolution', 'os', 'cpu', 'gpu']:
                    for value in details['values']:
                        if label == 'screen_resolution':
                            sub_conditions.append(f"LOWER({label}) = '{value}'")
                        else: 
                            sub_conditions.append(f"LOWER({label}) LIKE '%{value}%'")
                    if sub_conditions:
                        filter_conditions.append(f"({' OR '.join(sub_conditions)})")

                elif label in ['ram', 'screen_size', 'battery_capacity', 'weight', 'screen_fresh_rate', 'price_range']:
                    if len(details['values']) >= 2:
                        min_val, max_val = min(details['values']), max(details['values'])
                        #print(min_val, max_val)
                        sub_conditions.append(f"{label} BETWEEN {min_val} AND {max_val}")
                    else:
                        if label in ['weight', 'price_range']:
                            sub_conditions.append(f"{label} <= {details['values'][0]}")
                        else:
                            sub_conditions.append(f"{label} >= {details['values'][0]}")
                    if sub_conditions:
                        filter_conditions.append(f"({' OR '.join(sub_conditions)})")
    
        where_clause = " AND ".join(filter_conditions)
        if where_clause:
            filter_sql = base_sql_query +  " WHERE " + where_clause
        else: filter_sql = base_sql_query
    elif needs in ['Change Decision', 'Product Availability', 'Price Inquiry', 'Interest Confirmation']: # it can be existed selected
        prods_entities = dialogue_state['entities_in_conversation']
        for label, details in prods_entities.items():
            if details['selected']: # exist selected => don't care about the previous entities in array
                if label in ['prod_name', 'brand', 'cpu', 'gpu', 'screen_resolution', 'os']:
                    if label == 'screen_resolution':
                        filter_conditions.append(f"lower({label}) = '{details['selected']}'")
                    else:    
                        filter_conditions.append(f"lower({label}) LIKE '%{details['selected']}%'")
                else:
                    if label not in ['price_range', 'weight']:
                        filter_conditions.append(f"{label} = {details['selected']}")
                    else:
                        filter_conditions.append(f"{label}  <= {details['selected']}")
            else:
                if details['values']:
                    sub_conditions = []
                    if label in ['prod_name', 'brand', 'screen_resolution', 'os', 'cpu', 'gpu']:
                        for entity in details['values']:
                            if label in ['screen_resolution', 'brand']:
                                sub_conditions.append(f"lower({label}) = '{entity}'")
                            else:
                                sub_conditions.append(f"lower({label}) LIKE '%{entity}%'")
                    else:
                        if len(details['values']) >= 2: # normalize that we will search in range between minimum and maximum
                            min_val, max_val = min(details['values']), max(details['values'])
                            sub_conditions.append(f"{label} BETWEEN {min_val} AND {max_val}")
                        else:
                            if label in ['ram', 'screen_resolution', 'screen_size']:
                                filter_conditions.append(f"{label} = {details['values'][0]}")
                            elif label in ['price_range', 'weight']:
                                filter_conditions.append(f"{label}  <= {details['values'][0]}")
                            else:
                                filter_conditions.append(f"{label} >= {details['values'][0]}")
                    if sub_conditions:
                            filter_conditions.append(f"({' OR '.join(sub_conditions)})")
        where_clause = " AND ".join(filter_conditions)
        if where_clause:
            filter_sql = base_sql_query + " WHERE " + where_clause
    elif needs == 'Delivery Options':
        pass 
    elif needs == 'Find Similar':
        product_requirement = execute_purchase(dialogue_state)
        product_chosen = getProduct(product_requirement, dialogue_state)
        # temporarily, it will search product as a statistic of products be chosen 
        # get cpu, gpu, screen_resolution, ram

        cpu_require = product_chosen[0]['cpu']
        gpu_require = product_chosen[0]['gpu']
        screen_need = product_chosen[0]['screen_resolution']
        ram = product_chosen[0]['ram']
        filter_sql = f"SELECT laptop.id, brand, prod_name, ram, cpu, gpu, screen_resolution, cpu_info.score AS cpu_score, gpu_info.score AS gpu_score, screen_resolution_info.score AS screen_score, battery, price_range, screen_size, screen_fresh_rate, os, battery_capacity, weight from laptop join cpu_info using (cpu_id) join gpu_info using (gpu_id) join screen_resolution_info USING (screen_id) WHERE ram = {ram} AND cpu = '{cpu_require}' AND gpu = '{gpu_require}' AND screen_resolution = '{screen_need}'"

    else:
        product_requirement = execute_purchase(dialogue_state)
        print(product_requirement)
        product_chosen = getProduct(product_requirement, dialogue_state)
        cur_position = dialogue_state['positional_product']
        if cur_position != -10:
            product = getProductFromFilter(product_chosen, cur_position)
        else:
            product = product_chosen[0] # only has one product suit with user requirement
        if needs == 'Purchase Decision':
            filter_sql = f"INSERT INTO orders(id) VALUES({product['id']})"
    return filter_sql


In [23]:
import psycopg2
def execute_sql_query(query, fetch_results = False):
    conn_params = {
        "host": "localhost",
        "dbname": 'Graduation_Research_1',
        "user": 'postgres',
        "password": '03082004'
    }
    products = []
    try:
        connection = psycopg2.connect(**conn_params)
        cursor = connection.cursor()
        cursor.execute(query)
        #print(fetch_results)
        if fetch_results:
            res = cursor.fetchall()
            columns = [desc[0] for desc in cursor.description]
            for row in res:
                product = dict(zip(columns, row))
                products.append(product)
        else:
            connection.commit()
            #print("Here")
    except Exception as e:
        print(f"Error executing SQL query: {e}")
    finally:
        if cursor:
            cursor.close()
        if connection:
            connection.close()
    return products if fetch_results else None


In [24]:
def reset_weight(weights):
    for key in weights:
        weights[key] = 0.0

In [25]:
def check_for_performance(score):
    if score >= 7.5:
        print("Sản phẩm này có thể đáp ứng rất tốt cho nhu cầu của bạn")
    elif score >= 6.5:
        print("Sản phẩm này có thể đáp ứng ổn cho nhu cầu của bạn")
    elif score >= 5.5:
        print("Sản phẩm này có thể đáp ứng nhu cầu của bạn một cách trung bình")
    else:
        print("Rất tiếc, thông số của sản phẩm này không đủ để đáp ứng nhu cầu của bạn, bạn nên tìm hiểu những mấu khác")

In [26]:
import joblib

tfidf_vectorizer = joblib.load('tfidf_vectorizer_5.pkl')

In [27]:
# Print the current state of dialogue_state
def print_dialogue_state(dialogue_state):
    if dialogue_state['intent'] is not None:
        print(f"Intent: {dialogue_state['intent']}")
    
    print("\nProduct Specifications:")
    for key, value in dialogue_state['entities_in_conversation'].items():
        if key not in dialogue_state['entities_for_specific_need']:  
            if value["values"]:  
                value_str = ', '.join(map(str, value['values']))
                print(f"{key.capitalize().replace('_', ' ')}: {value_str}")
            if value["selected"]:  
                print(f"Selected {key.capitalize().replace('_', ' ')}: {value['selected']}")
    
    print("\nSpecific Needs:")
    for key, value in dialogue_state['entities_for_specific_need'].items():
        if value:  # Check if the specific need is True
            print(f"  {key.capitalize().replace('_', ' ')}: {value}")

def printProduct(dialogue_state):
    print("\nList of Products: ")
    i = 1
    for product in dialogue_state['product_in_current_message']:
        print(f"Product {i}:")
        for key, value in product.items():
            if dialogue_state['intent'] == 'Price Inquiry':
                if key in ['prod_name', 'price_range']:
                    print(f"{key}: {value}")
            else:
                if key not in ['cpu_score', 'gpu_score', 'screen_resolution_score', 'evaluate_score', 'id']: # this stat do not need to show
                    print(f"{key}: {value}")
        print("__________________________________________________________")
        i+= 1

def update_dialogue_state(user_input, dialogue_state, id_to_label, num_to_label):
    tmp_product = [] # used to store temporaly the products before evaluated
    # Predict intent and update dialogue state
    predicted_intent = predict_intent(model_intent, tokenizer_intent, user_input, tfidf_vectorizer)
    dialogue_state['intent'] = predicted_intent
    
    # Predict entities and update dialogue state
    entities = predict_ner(user_input, ner_pipe, id_to_label)
    
    for entity, label in entities:
        if label == "price_range":
            entity = convert_trieu_to_number(entity)
            entity = float(entity)        
        if label in dialogue_state['entities_in_conversation']:
            if label in ['ram', 'screen_size', 'battery_capacity', 'weight', 'screen_fresh_rate']:
                entity = float(entity)
            if not any(entity == existing for existing in dialogue_state['entities_in_conversation'][label]["values"]):
                dialogue_state['entities_in_conversation'][label]["values"].append(entity)
            if predicted_intent in ["Change Decision", "Interest Confirmation", "Product Availability", "Purchase Decision", "Feature Confirmation", "Price Inquiry", "Find Similar"]:
                dialogue_state['entities_in_conversation'][label]["selected"] = entity
    
        elif label in dialogue_state['entities_for_specific_need']:
            dialogue_state['entities_for_specific_need'][label] = True

    print("Updated Dialogue State:")
    print_dialogue_state(dialogue_state)
    if predicted_intent not in ['Feature Confirmation', 'Thank You/Closing', 'Payment Option', 'Return Policy Inquiry']:
        if predicted_intent != 'Interest Confirmation':
            filter_sql = generating_sql(dialogue_state)
            if filter_sql:
                print("Generated SQL Query:")
                print(filter_sql)
                if predicted_intent != 'Purchase Decision':
                    product_retrieve = execute_sql_query(filter_sql, fetch_results=True)
                    for product_data in product_retrieve:
                        product = {
                            "id": product_data.get("id"),
                            "prod_name": product_data.get("prod_name"),
                            "brand": product_data.get("brand"), 
                            "cpu": product_data.get("cpu"), 
                            "gpu": product_data.get("gpu"),
                            "ram": product_data.get("ram"), #
                            "battery": product_data.get("battery"), #
                            "price_range": product_data.get("price_range"),
                            "screen_size": product_data.get("screen_size"),
                            "screen_resolution": product_data.get("screen_resolution"),
                            "screen_fresh_rate": product_data.get("screen_fresh_rate"), #
                            "os": product_data.get("os"), 
                            "battery_capacity": product_data.get("battery_capacity"),
                            "weight": product_data.get("weight"), #
                            "cpu_score": product_data.get("cpu_score"), # it will not be printed, used for evaluating
                            "gpu_score": product_data.get("gpu_score"), # used for evaluating
                            "screen_resolution_score": product_data.get("screen_score"), # used to evaluate
                            "evaluate_score": None
                        }    
                        #print(product)
                        tmp_product.append(product)
                    if predicted_intent == 'Price Inquiry':
                        dialogue_state['product_in_current_message'] = tmp_product
                        for product in tmp_product:
                            if product['id'] not in dialogue_state['product_in_conversation']:
                                dialogue_state['product_in_conversation'].append(product)
                        printProduct(dialogue_state)
                    else:
                        updateWeight(dialogue_state) # update only one time before doing anything
                        for product in tmp_product:
                            product['evaluate_score'] = evaluate(dialogue_state,product)
                        top_products = sorted(tmp_product, key=lambda x: x['evaluate_score'], reverse=True)[:5]
                        for product in top_products:
                            dialogue_state['product_in_current_message'].append(product)
                            if product['id'] not in dialogue_state['product_in_conversation']: # to avoid the same product be assigned to my list
                                dialogue_state['product_in_conversation'].append(product)
                        reset_weight(weights)
                        printProduct(dialogue_state)
                    dialogue_state['product_in_current_message'] = []
                else: # Purchase Confirmation
                    # check for user input, just only 1 position
                    dialogue_state['positional_product'] = extract_positional_phrases_vietnamese(user_input) 
                    execute_sql_query(filter_sql, fetch_results=False)
                    print("Complete Updated Orders")   
        else: # = interest confirmation
            feature = execute_purchase(dialogue_state)
            #print(f"Here is feature: {feature}")
            product_chosen = getProduct(feature, dialogue_state) # array
            #print(product_chosen)
            #print(f"Product chosen: {product_chosen}")
            if len(product_chosen) != 0: # existed product, do not search more
                # check for position
                dialogue_state['positional_product'] = extract_positional_phrases_vietnamese(user_input)
                cur_position = dialogue_state['positional_product']
                if cur_position != -10: # existed position
                    product_need = getProductFromFilter(product_chosen, cur_position)
                    dialogue_state['product_in_current_message'].append(product_need)
                else: 
                    for prod in product_chosen:
                        dialogue_state['product_in_current_message'].append(prod)
                printProduct(dialogue_state)
                dialogue_state['product_in_current_message'] = [] # reset
            else: 
                filter_sql = generating_sql(dialogue_state)
                if filter_sql:
                    tmp_product = []
                    product_get = execute_sql_query(filter_sql, fetch_results=True)
                    for product_data in product_get:
                        product = {
                            "id": product_data.get("id"),
                            "prod_name": product_data.get("prod_name"),
                            "brand": product_data.get("brand"), 
                            "cpu": product_data.get("cpu"), 
                            "gpu": product_data.get("gpu"),
                            "ram": product_data.get("ram"), #
                            "battery": product_data.get("battery"), #
                            "price_range": product_data.get("price_range"),
                            "screen_size": product_data.get("screen_size"),
                            "screen_resolution": product_data.get("screen_resolution"),
                            "screen_fresh_rate": product_data.get("screen_fresh_rate"), #
                            "os": product_data.get("os"), 
                            "battery_capacity": product_data.get("battery_capacity"),
                            "weight": product_data.get("weight"), #
                            "cpu_score": product_data.get("cpu_score"), # it will not be printed, used for evaluating
                            "gpu_score": product_data.get("gpu_score"), # used for evaluating
                            "screen_resolution_score": product_data.get("screen_score"), # used to evaluate
                            "evaluate_score": None
                        }    
                        tmp_product.append(product)
            
                    updateWeight(dialogue_state)
                    for product in tmp_product:
                        product['evaluate_score'] = evaluate(dialogue_state, product)
                    top_products = sorted(tmp_product, key=lambda x: x['evaluate_score'], reverse=True)[:5]
                    for product in top_products:
                        dialogue_state['product_in_current_message'].append(product)
                        if product['id'] not in dialogue_state['product_in_conversation']:
                            dialogue_state['product_in_conversation'].append(product)
                    reset_weight(weights)
                    printProduct(dialogue_state)     
                    dialogue_state['product_in_current_message'] = [] # reset the state for the next message  

    elif predicted_intent == 'Return Policy Inquiry':
        refund_rate = 70
        print(
            f"Với mọi sản phẩm của chúng tôi, quý khách có thể trải nghiệm và trả lại nếu cảm thấy không phù hợp mà không mất phí trong vòng 7 ngày đầu, "
            f"và nhận lại {refund_rate}% số tiền trong vòng 1 tháng nếu quý khách muốn đổi trả. Sau 30 ngày, chúng tôi không nhận bất cứ yêu cầu đổi trả nào, "
            f"mà sẽ mua lại tùy theo tình trạng của sản phẩm."   
        )
    elif predicted_intent == 'Feature Confirmation':
        product_requirement = execute_purchase(dialogue_state)
        print(product_requirement)
        product_chosen = getProduct(product_requirement, dialogue_state)
        dialogue_state['positional_product'] = extract_positional_phrases_vietnamese(user_input)
        cur_position = dialogue_state['positional_product']
        if cur_position != -10:
            product_need = getProductFromFilter(product_chosen, cur_position)
        else:
            product_need = product_chosen[0]
        #print(product_chosen)
        dialogue_state['product_in_current_message'].append(product_need)
        printProduct(dialogue_state)
        dialogue_state['product_in_current_message'] = []
        updateWeight(dialogue_state) # update each time perform calculating
        product_chosen['evaluate_score'] = evaluate(dialogue_state, product_chosen)
        #print(weights)
        #print(product_chosen['evaluate_score'])
        reset_weight(weights)
        check_for_performance(product_chosen['evaluate_score'])
    dialogue_state['positional_product'] = -10 # reset

            
    
# Reset the dialogue state for a new conversation
def reset_dialogue_state(dialogue_state):
    dialogue_state["intent"] = None
    for key in dialogue_state["entities_in_conversation"]:
        dialogue_state["entities_in_conversation"][key]["values"] = []
        dialogue_state["entities_in_conversation"][key]["selected"] = None
    for key in dialogue_state["entities_for_specific_need"]:
        dialogue_state["entities_for_specific_need"][key] = False
    dialogue_state['product_in_conversation'] = []


In [35]:
list_quit = ['bye', 'quit']
while(1):
    user_input = input("User: ")
    print(f"User: {user_input}")
    if(user_input.lower() in list_quit):
        print('Bye')
        reset_dialogue_state(dialogue_state)
        reset_weight(weights)
        break
    update_dialogue_state(user_input, dialogue_state, id_to_label, num_to_label) 

User: Tôi cần tìm laptop khoảng 25 triệu để chơi game và làm về đồ họa 
Updated Dialogue State:
Intent: Specific Need

Product Specifications:
Price range: 25000000.0

Specific Needs:
  Game task: True
Generated SQL Query:

    SELECT laptop.id, brand, prod_name, ram, cpu, gpu, screen_resolution, 
           cpu_info.score AS cpu_score, gpu_info.score AS gpu_score, 
           screen_resolution_info.score AS screen_score, battery, price_range, 
           screen_size, screen_fresh_rate, os, battery_capacity, weight 
    FROM laptop 
    JOIN cpu_info USING (cpu_id) 
    JOIN gpu_info USING (gpu_id) 
    JOIN screen_resolution_info USING (screen_id)
     WHERE (price_range <= 25000000.0)

List of Products: 
Product 1:
prod_name: Laptop Asus TUF Gaming A16 FA617NSR (RL100W)
brand: Asus
cpu: AMD Ryzen 7 - 7435HS
gpu: AMD Radeon RX 7600S, 8 GB
ram: 16
battery: 4
price_range: 24990000.0
screen_size: 16.0
screen_resolution: WUXGA
screen_fresh_rate: 144
os: Windows 11 Home SL
battery_capacity

AttributeError: 'list' object has no attribute 'get'

In [29]:
import re
def extract_positional_phrases_vietnamese(text):
    positional_keywords = {
        "đầu tiên": 0,
        "đầu": 0,
        "cuối": -1,
        "cuối cùng": -1,
        "sau cùng": -1,
        "vừa được": -1,
        "mới được": -1,
        "vừa rồi": -1,
        "thứ nhất": 0,
        "thứ hai": 1
    }

    regex = r'\b(đầu tiên|đầu|cuối cùng|sau cùng|cuối|vừa được|mới được|vừa rồi|thứ nhất|thứ hai|thứ (\d+))\b'
    match = re.search(regex, text.lower(), re.UNICODE)

    if match:
        matched_word = match.group(1)  
        if "thứ" in matched_word:
            if matched_word in positional_keywords:
                return positional_keywords[matched_word]  
            else:
                number = int(match.group(2))  
                return number - 1
        else:
            return positional_keywords[matched_word]
    else:
        return -10  # Default value for no match

# Example usage
print(extract_positional_phrases_vietnamese("đây là sản phẩm thứ nhất"))  # Output: 0
print(extract_positional_phrases_vietnamese("đây là sản phẩm thứ hai"))   # Output: 1
print(extract_positional_phrases_vietnamese("sản phẩm cuối cùng"))        # Output: -1
print(extract_positional_phrases_vietnamese("sản phẩm thứ 3"))            # Output: 2
print(extract_positional_phrases_vietnamese("không có gì phù hợp"))       # Output: -10

0
1
-1
2
-10
