In [11]:
# Let's reload the data and try again
import torch
import pandas as pd
from extended_function import *

from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.preprocessing import LabelEncoder

In [None]:
# Read the CSV file
df = pd.read_csv("../data/combine_df.csv")

In [12]:
# Initialize tokenizer and prepare data
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
# Load the model
print("\nLoading model...")
model = torch.load("../model/model_25epochs.pth", weights_only=False)
print("\nModel Load Completed")


Loading model...

Model Load Completed


In [19]:
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(df['service'])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [25]:
# Function to predict intent using the BERT model
def predict_intent_bert(text):
    model.eval()
    encoding = tokenizer(
        text,
        truncation=True,
        padding=True,
        max_length=64,
        return_tensors='pt'
    )
    
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
        confidence, predicted = torch.max(probabilities, 1)
        predicted_label = label_encoder.inverse_transform([predicted.cpu().item()])[0]
        
    # return predicted_label, confidence.cpu().item()
    return predicted_label

In [21]:
conversation_db = extract_conversation_pairs(df)

In [22]:
def generate_response(user_message, intent, entities=None):
    """Generate a response using similar conversations and custom formatting"""
    # Find similar conversations
    similar_convs = find_similar_conversation(user_message, intent, conversation_db)
    
    if not similar_convs:
        return f"I understand this is a {intent.replace('_', ' ')}. Could you please provide more details?"
    
    # Get the most similar conversation's response
    best_response = similar_convs[-1]['bot_response']
    
    # Create a structured response
    response = f"I understand your {intent.replace('_', ' ')}. "
    response += best_response + "\n\n"
    
    # Add specific requirements based on intent
    if intent == 'training_request':
        response += "Please provide:\n- Number of participants\n- Preferred dates\n- Specific topics\n- Budget constraints"
    elif intent == 'it_issue_report':
        response += "To help resolve this quickly, please share:\n- Device details\n- Error messages\n- When the issue started\n- Steps already taken"
    elif intent == 'access_request':
        response += "To process your request, I need:\n- System/application name\n- Required access level\n- Business justification\n- Manager approval"
    elif intent == 'time_off_report':
        response += "Please confirm:\n- Exact dates\n- Type of leave\n- Handover plan"
    
    # Add entity-specific responses if available
    if entities:
        try:
            entities_dict = json.loads(entities.replace("'", '"')) if isinstance(entities, str) else entities
            if 'training_topic' in entities_dict:
                response += f"\n\nI see you're interested in {entities_dict['training_topic']} training."
            if 'issue_type' in entities_dict:
                response += f"\n\nI understand you're experiencing {entities_dict['issue_type']} issues."
        except:
            pass
    
    return response

In [None]:
# Test the response generator
print("Advanced Response Generator Examples:")
print("-" * 70)
test_cases = [
    ("I need to arrange machine learning training for my team",  
     {'training_topic': 'machine learning', 'number_of_participants': '5'}),
    ("My laptop keeps crashing every time I open email", 
     {'issue_type': 'system_crash', 'affected_application': 'email'}),
    ("I'd like to request vacation days for next month", 
     {'leave_type': 'vacation', 'dates': 'next month'})
]

for message, entities in test_cases:
    print(f"User: {message}")
    predicted_intent = predict_intent_bert(message)
    response = generate_response(message, predicted_intent, entities)
    print(f"Bot: {response}")
    print("-" * 70)

Advanced Response Generator Examples:
----------------------------------------------------------------------
User: I need to arrange machine learning training for my team on topic machine learning
Bot: I understand your training request. Certainly, improving your skills is important. What kind of outcomes are you hoping to achieve through the training?', "That's a great goal to have. Do you have an estimated budget in mind for the training?

Please provide:
- Number of participants
- Preferred dates
- Specific topics
- Budget constraints

I see you're interested in machine learning training.
----------------------------------------------------------------------
User: My laptop keeps crashing every time I open email
Bot: I understand your it issue report. No problem, can you tell me a bit more about what's going on so I can try to help resolve it?

To help resolve this quickly, please share:
- Device details
- Error messages
- When the issue started
- Steps already taken

I understand y