In [124]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.metrics.pairwise import cosine_similarity

In [125]:
# Load dataset
df = pd.read_json('LLM_Hallucination_Dataset.json', lines=True)

In [126]:
df.head()

Unnamed: 0,ID,user_query,chatgpt_response,hallucination_label
0,1,Produce a list of common words in the English ...,"the, a, and, to, in, that, is, it, of, for, wi...",no
1,2,Provide a few examples of homophones.,1. allowed (permitted) and aloud (out loud)\n2...,yes
2,3,Create a chart outlining the world's populatio...,"Unfortunately, as an AI language model, I cann...",yes
3,4,Design a shape with 10 vertices (corners).,Here is an example of a shape with 10 vertices...,yes
4,5,Automatically generate a 10 by 10 multiplicati...,"Sure, here is a 10 by 10 multiplication table:...",yes


In [127]:
# Drop missing values
df = df.dropna()

In [128]:
# Use relevant columns
df = df[['user_query', 'chatgpt_response', 'hallucination_label']]

In [129]:
# Map labels if necessary (e.g., True/False to 0/1)
df['hallucination_label'] = df['hallucination_label'].map({'yes': 1, 'no': 0})

In [130]:
df['user_query'] = df['user_query'].str.strip()
df['chatgpt_response'] = df['chatgpt_response'].str.strip()

In [131]:
df['chatgpt_response'] = df['chatgpt_response'].str.replace(r'\n', ' ', regex=True)
df['chatgpt_response'] = df['chatgpt_response'].str.replace(r'```', '', regex=True)

In [132]:
# Feature Engineering: Combine query and answer for vectorization
df['input_text'] = df['user_query'] + " " + df['chatgpt_response']

In [133]:
# Vectorize text
vectorizer = TfidfVectorizer(max_features=500)
X = vectorizer.fit_transform(df['input_text'])
y = df['hallucination_label']

In [134]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [135]:
# Model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [136]:
# Prediction & Evaluation
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]

print(classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))

              precision    recall  f1-score   support

           0       0.79      0.98      0.88       783
           1       0.56      0.07      0.12       217

    accuracy                           0.79      1000
   macro avg       0.67      0.53      0.50      1000
weighted avg       0.74      0.79      0.71      1000

ROC AUC: 0.610672646267752


In [137]:
def report_confidence(query, llm_output, model, vectorizer):
    """
    Print the confidence score that rates how likely a hallucination occurred,
    for a given LLM output and query. Uses multiple thresholds for nuanced ratings.
    """
    input_text = query + " " + llm_output
    features = vectorizer.transform([input_text])
    hallucinate = model.predict_proba(features)[0, 1]
    confidence_score = 1 - hallucinate  # Higher = less likely hallucination

    # Multi-level rating
    if confidence_score >= 0.9:
        rating = "Extremely unlikely to be a hallucination."
    elif confidence_score >= 0.75:
        rating = "Very unlikely to be a hallucination."
    elif confidence_score >= 0.55:
        rating = "Probably accurate, but double-check recommended."
    elif confidence_score >= 0.35:
        rating = "Possibly a hallucination. Review carefully."
    else:
        rating = "Likely to be a hallucination. Exercise caution!"

    print(f"Query: {query}")
    print(f"LLM Answer: {llm_output}")
    print(f"Confidence Score: {confidence_score:.2f} (higher = less likely hallucination)")
    print(f"Assessment: {rating}\n")

In [138]:
# Example usage for report_confidence
report_confidence("Who is the president of France?", "Emmanuel Macron is the president of France.", clf, vectorizer)

Query: Who is the president of France?
LLM Answer: Emmanuel Macron is the president of France.
Confidence Score: 0.71 (higher = less likely hallucination)
Assessment: Probably accurate, but double-check recommended.



In [139]:
import json
import torch
from transformers import BertTokenizer, BertModel
from pprint import pprint

# ====== Load the pre-trained BERT model and tokenizer ======
tokenizer = BertTokenizer.from_pretrained(r"C:\Users\Aedan\Downloads\bert-based-uncased")
model = BertModel.from_pretrained(r"C:\Users\Aedan\Downloads\bert-based-uncased")

# Keys in the dictionary
keys = [
    'Administrative',
    'IT / Technical Support',
    'Policies & Compliance',
    'Project-Related Queries',
    'HR & Performance',
    'Collaboration & Communication Tools'
]

encoding = tokenizer(
    keys,
    padding=True,           # Pads to the longest sequence in the batch
    truncation=True,        # Optional: truncates if longer than BERT max length
    return_tensors='pt'     # Returns PyTorch tensors (can use 'tf' for TensorFlow)
)

# 3. Inspect results
print("\nToken IDs (padded):")
pprint(encoding['input_ids'])

print("\nAttention Masks:")
pprint(encoding['attention_mask'])

print("\nTokenized and Padded Keys:")
for key, token_ids in zip(keys, encoding['input_ids']):
    print(f"{key:<40} {token_ids.tolist()}")


Token IDs (padded):
tensor([[  101,  3831,   102,     0,     0,     0,     0],
        [  101,  2009,  1013,  4087,  2490,   102,     0],
        [  101,  6043,  1004, 12646,   102,     0,     0],
        [  101,  2622,  1011,  3141, 10861,  5134,   102],
        [  101, 17850,  1004,  2836,   102,     0,     0],
        [  101,  5792,  1004,  4807,  5906,   102,     0]])

Attention Masks:
tensor([[1, 1, 1, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 0]])

Tokenized and Padded Keys:
Administrative                           [101, 3831, 102, 0, 0, 0, 0]
IT / Technical Support                   [101, 2009, 1013, 4087, 2490, 102, 0]
Policies & Compliance                    [101, 6043, 1004, 12646, 102, 0, 0]
Project-Related Queries                  [101, 2622, 1011, 3141, 10861, 5134, 102]
HR & Performance                         [101, 17850, 1004, 2836, 102, 0, 0]
Colla

In [140]:
import json
import re

# Read and parse the JSON file
with open('package.json', 'r', encoding='utf8') as file:
    json_content = file.read()
    data = json.loads(json_content)

# Extract the original category keys
original_keys = list(data.keys())

print("Original category keys:")
for idx, key in enumerate(original_keys):
    print(f"{idx + 1}. {key}")

# Find the maximum number of tokens to determine padding length
max_token_count = 0
all_tokenized = []

for key in original_keys:
    # Replace punctuation and special chars with spaces
    cleaned_key = re.sub(r'[^\w\s]', ' ', key.lower())
    # Split on whitespace and remove empty tokens
    tokens = [token for token in cleaned_key.split() if len(token) > 0]
    max_token_count = max(max_token_count, len(tokens))
    all_tokenized.append(tokens)

print(f"\nMaximum token count found: {max_token_count}")

# Create tokenized and padded versions
tokenized_padded_keys = []

for key in original_keys:
    # Replace punctuation and special chars with spaces
    cleaned_key = re.sub(r'[^\w\s]', ' ', key.lower())
    # Split on whitespace and remove empty tokens
    tokens = [token for token in cleaned_key.split() if len(token) > 0]
    
    # Pad to max_token_count with empty strings
    padded_tokens = tokens.copy()
    while len(padded_tokens) < max_token_count:
        padded_tokens.append('')
    
    tokenized_padded_keys.append(padded_tokens)

print("\nTokenized and padded keys:")
for idx, tokens in enumerate(tokenized_padded_keys):
    formatted_tokens = ', '.join([f'"{t}"' for t in tokens])
    print(f"{idx + 1}. [{formatted_tokens}]")

Original category keys:
1. How do I request vacation time?
2. When is payroll processed?
3. Where can I find my pay stubs?
4. How do I change my direct deposit information?
5. Can I carry over unused vacation days?
6. How do I update my mailing address?
7. Can I request an advance on my paycheck?
8. How do I request unpaid leave?
9. How do I update my emergency contact?
10. What is the process for maternity leave?
11. Where do I find company holidays?
12. How do I request a flexible work schedule?
13. How do I enroll in direct deposit?
14. Can I be paid out for unused vacation?
15. Where can I find my hire date?
16. How do I opt out of paper paychecks?
17. How do I request a copy of my contract?
18. How do I check my remaining sick days?
19. How do I request a change in job title?
20. How do I submit overtime hours?
21. What is the policy for clocking in late?
22. Can I work part-time temporarily?
23. How do I report an absence?
24. What happens if I forget to clock in?
25. How do I re

In [141]:
employee_faq = {
    "How do I request vacation time?": "Submit your request through the HR portal under 'Leave Requests'. Manager approval is required before it is finalized.",
    "When is payroll processed?": "Payroll is processed on the 15th and last day of each month. Direct deposits typically post by the following business day.",
    "Where can I find my pay stubs?": "Pay stubs are available in the Payroll section of the HR portal.",
    "How do I change my direct deposit information?": "Update your bank details in the Payroll section of the HR portal. Changes made after payroll processing will apply to the next cycle.",
    "Can I carry over unused vacation days?": "Up to 5 days can be carried over into the next calendar year, subject to manager approval.",
    "How do I update my mailing address?": "Update your personal profile in the HR portal to ensure payroll and tax forms are sent to the correct address.",
    "Can I request an advance on my paycheck?": "Payroll advances are not standard but can be considered for emergencies with HR and finance approval.",
    "How do I request unpaid leave?": "Submit the request in the HR portal and specify 'unpaid leave'. Manager and HR approval are required.",
    "How do I update my emergency contact?": "Log in to the HR portal and update the emergency contact information in your personal profile.",
    "What is the process for maternity leave?": "Contact HR to begin the process and provide the necessary medical documentation.",
    "Where do I find company holidays?": "The holiday calendar is posted on the HR portal.",
    "How do I request a flexible work schedule?": "Submit your proposal to your manager for review and approval.",
    "How do I enroll in direct deposit?": "Go to the Payroll section of the HR portal and enter your banking details.",
    "Can I be paid out for unused vacation?": "Yes, upon termination, in accordance with company policy and state law.",
    "Where can I find my hire date?": "Your hire date is listed in your HR portal profile.",
    "How do I opt out of paper paychecks?": "Enroll in direct deposit through the Payroll section of the HR portal.",
    "How do I request a copy of my contract?": "Contact HR for a digital or printed copy of your employment contract.",
    "How do I check my remaining sick days?": "Sick leave balances are listed in the HR portal under 'Leave Balances'.",
    "How do I request a change in job title?": "Discuss with your manager and submit a formal request to HR.",
    "How do I submit overtime hours?": "Record overtime in the timekeeping system for manager approval.",
    "What is the policy for clocking in late?": "Refer to the attendance policy in the employee handbook.",
    "Can I work part-time temporarily?": "Yes, if approved by your manager and HR.",
    "How do I report an absence?": "Notify your manager as early as possible and log it in the HR portal.",
    "What happens if I forget to clock in?": "Inform your manager and submit a time adjustment request.",
    "How do I request a salary adjustment?": "Submit your request to your manager with supporting documentation.",
    "What is the process for retirement plan enrollment?": "Sign up during benefits enrollment or after your eligibility period.",
    "How do I request a pay raise?": "Discuss with your manager during your performance review.",
    "What happens to my benefits if I leave the company?": "You may be eligible for COBRA coverage. HR will provide details.",
    "Can I cash out my retirement plan?": "Yes, but penalties may apply. Contact the plan administrator for details.",
    "When will I receive my final paycheck?": "Final pay is issued on your last day or the next scheduled payroll date.",
    "How do I reset my password?": "Use the self-service password reset tool on the IT helpdesk portal or contact IT support.",
    "My computer is running slowly, what should I do?": "Restart your computer, close unused applications, and run a virus scan. If the issue persists, contact IT.",
    "How do I access the VPN?": "Install the company VPN client from the IT resources page and log in with your network credentials.",
    "Can I install my own software on my work computer?": "Only software approved by IT can be installed. Submit a software request if you need additional tools.",
    "How do I connect to the office Wi-Fi?": "Select the secure corporate network, enter your credentials, and accept the security certificate.",
    "How do I set up my email on my phone?": "Follow the instructions in the Mobile Email Setup guide on the IT portal.",
    "Why can’t I access a shared folder?": "You may not have the correct permissions. Submit a file access request through the IT helpdesk.",
    "How do I request a new laptop?": "Submit a hardware request ticket through the IT helpdesk. Approval from your manager is required.",
    "How do I update my software?": "Updates are pushed automatically. If you need to update manually, use the company’s software center.",
    "What should I do if my monitor is flickering?": "Check the cable connections and try another port. If the issue persists, contact IT.",
    "How do I connect a second monitor?": "Plug it into the available port on your computer and configure the display settings.",
    "How do I request additional RAM or storage?": "Submit a hardware upgrade request through the IT helpdesk.",
    "What do I do if my email inbox is full?": "Archive old emails, delete unnecessary messages, and empty your trash folder.",
    "How do I back up my files?": "Use the company’s cloud storage or backup system to save important files regularly.",
    "What is the process for replacing a damaged laptop?": "Report the damage to IT, submit an incident ticket, and arrange for replacement equipment.",
    "Can I connect my personal phone to company email?": "Yes, if your device meets security requirements. Follow the mobile setup guide in the IT portal.",
    "How do I request a software license?": "Submit a request through the IT helpdesk with details about the software and its intended use.",
    "What should I do if I receive a suspicious link?": "Do not click it. Forward it to the IT security team immediately.",
    "How do I access my work desktop remotely?": "Use the remote desktop tool provided by IT and connect via the VPN.",
    "How do I encrypt a file before sending it?": "Use the encryption tool available in the IT resources section.",
    "Why can’t I print to the office printer?": "Check if you are connected to the correct printer and if it has paper and toner. If not resolved, contact IT.",
    "How do I install the approved antivirus software?": "Download it from the IT software library and follow the installation guide.",
    "Can I use my own USB drive at work?": "Only encrypted, IT-approved USB drives may be used.",
    "How do I request access to a restricted system?": "Submit a system access request form via the IT helpdesk.",
    "Why can’t I log in to the HR portal?": "Ensure you are using the correct credentials. If locked out, contact IT support.",
    "How do I request admin rights on my computer?": "Admin rights are granted only for specific needs. Submit a justification to IT.",
    "How do I report a broken headset or webcam?": "Log a ticket in the IT helpdesk for replacement or repair.",
    "What is the policy on using personal cloud storage?": "Personal cloud storage is not allowed for work files.",
    "Can I use third-party cloud storage for work files?": "No, only company-approved cloud services may be used.",
    "How do I update my browser?": "Updates are pushed automatically, but you can manually update via the browser’s settings.",
    "How do I clear my browser cache?": "Go to the browser’s settings menu and select 'Clear browsing data'.",
    "What should I do if my keyboard stops working?": "Check the connection or replace the batteries if wireless. Contact IT if unresolved.",
    "How do I connect to the office projector?": "Use an HDMI or VGA cable and select the correct display source.",
    "How do I change my network password?": "Log in to the password management tool and follow the prompts.",
    "Why is my VPN connection unstable?": "Check your internet connection and restart the VPN client. Contact IT if the issue continues.",
    "How do I get training on a new software tool?": "Check the IT training resources page or request a training session.",
    "What should I do if my laptop won’t turn on?": "Check the power connection, try another outlet, and contact IT if it still will not power on.",
    "How do I request a shared mailbox?": "Submit a request through the IT helpdesk with the mailbox name and required users.",
    "How do I install approved software?": "Use the software center to install approved applications.",
    "What should I do if I encounter a phishing email?": "Do not click any links. Report it to IT immediately.",
    "Can I install beta software for testing?": "Only with IT approval and on designated test devices.",
    "What should I do if I forget my security badge?": "Contact the front desk for a temporary badge and inform IT security if needed.",
    "How do I report a cybersecurity incident?": "Notify the IT security team immediately and follow incident reporting procedures.",
    "How do I secure my laptop when traveling?": "Use a locking cable, never leave it unattended, and enable disk encryption.",
    "How do I connect to the guest Wi-Fi?": "Select the guest network and follow the on-screen registration instructions.",
    "Can I use my work email for personal matters?": "Limited personal use is allowed if it does not interfere with work and complies with policy.",
    "What should I do if my mouse is not working?": "Check the battery or cable, try another USB port, and contact IT if unresolved.",
    "How do I request a conference phone?": "Submit a request via the IT helpdesk, specifying the date and location.",
    "What is the policy on workplace searches?": "The company reserves the right to inspect property on its premises where legally permitted.",
    "How do I troubleshoot slow internet at my desk?": "Restart your computer, disconnect unused devices, and contact IT if the issue persists.",
    "Why can’t I join a video conference?": "Check your camera and microphone settings, ensure you have internet access, and restart the conferencing tool.",
    "How do I restore deleted files?": "Check the recycle bin or request recovery from IT backups.",
    "How do I request multi-factor authentication setup?": "Submit a ticket through the IT helpdesk to enable MFA on your account.",
    "How do I join a project channel in the chat platform?": "Search for the channel in the chat platform and click 'Join'.",
    "Where can I find meeting recordings?": "Recordings are stored in the meeting platform's 'Recordings' section.",
    "How do I schedule a video conference?": "Use the calendar integration to create a meeting and add video call details.",
    "How do I share my screen during a call?": "Click the 'Share Screen' icon in the meeting toolbar.",
    "How do I mute notifications during focus time?": "Enable 'Do Not Disturb' mode in your collaboration tool settings.",
    "How do I upload documents to the shared drive?": "Navigate to the shared folder and use the upload button.",
    "How do I search past chat messages?": "Use the search bar in the chat platform to look up keywords.",
    "How do I set my status to away?": "Click your profile and select 'Away' from the status menu.",
    "How do I tag someone in a message?": "Type '@' followed by their username in the chat.",
    "How do I create a poll in the chat platform?": "Use the poll feature or integration available in the chat tool.",
    "How do I share large files with the team?": "Upload them to the shared drive and send the link.",
    "How do I integrate third-party apps with the chat platform?": "Go to settings and add integrations from the app store.",
    "Where do I find the meeting dial-in number?": "It is included in the meeting invite sent to your calendar.",
    "How do I record a meeting?": "Click 'Record' in the meeting toolbar and ensure participants are notified.",
    "How do I pin a message in a chat channel?": "Hover over the message and click the 'Pin' option.",
    "How do I create a task list in the project board?": "Click 'Add Task' and assign it to the relevant member.",
    "How do I request admin permissions for a shared folder?": "Submit a request to the folder owner or IT support.",
    "How do I set up recurring meetings?": "Select 'Repeat' when creating a meeting in the calendar tool.",
    "How do I use the whiteboard feature in meetings?": "Click the 'Whiteboard' icon and collaborate in real-time.",
    "How do I forward a chat message to email?": "Use the 'Forward to Email' option available in message settings.",
    "How do I restore a deleted file from the shared drive?": "Check the 'Recycle Bin' or request a restore from IT.",
    "How do I add guests to a team meeting?": "Include their email addresses when sending the meeting invite.",
    "How do I set up chat notifications for specific keywords?": "Use keyword alerts in your chat tool settings.",
    "How do I see who has read my message?": "Check the read receipts feature in your chat platform.",
    "How do I share meeting notes with the team?": "Post them in the project channel or shared document.",
    "How do I use mentions to alert an entire team?": "Type '@team' or '@channel' depending on the platform.",
    "How do I enable captions in video meetings?": "Turn on captions from the meeting toolbar.",
    "How do I blur my background during a video call?": "Enable background effects in the meeting settings.",
    "How do I lock a meeting to prevent new participants?": "Use the 'Lock Meeting' option in the meeting controls.",
    "How do I download shared files from the chat platform?": "Click the file link and select 'Download'.",
    "How do I archive an inactive chat channel?": "Use the 'Archive' function from the channel settings menu.",
    "How do I check shared document version history?": "Right-click the document and select 'Version History'.",
    "How do I assign a task to a colleague in the project tool?": "Click 'Assign' and choose their name from the list.",
    "How do I get notifications for file changes?": "Enable file change alerts in the shared drive settings.",
    "How do I search across all shared documents?": "Use the global search bar in the document management system.",
    "How do I react to a message in chat?": "Hover over the message and select a reaction emoji.",
    "How do I request training on collaboration tools?": "Submit a request to HR or IT for tool-specific training.",
    "How do I bookmark important messages?": "Click the 'Save' or 'Bookmark' option on the message.",
    "How do I sync my chat status with my calendar?": "Enable calendar integration in the chat settings.",
    "How do I transfer host controls in a meeting?": "Select 'Make Host' in the participant list menu.",
    "How do I upload a profile picture in collaboration tools?": "Go to profile settings and upload an image.",
    "How do I add someone to a private chat group?": "Open group settings and invite the new participant.",
    "How do I check who has access to a shared folder?": "Right-click the folder and select 'Share Settings'.",
    "How do I connect my mobile device to collaboration tools?": "Download the mobile app and sign in with your work account.",
    "How do I set up a project board for a new team?": "Create a new board in the project management tool and invite members.",
    "How do I link a chat conversation to a task?": "Use the task creation shortcut from the chat message.",
    "What is the company's dress code?": "Business casual is standard unless otherwise specified for an event or meeting.",
    "How do I report harassment?": "Contact HR directly or submit a confidential report through the Ethics Hotline.",
    "Can I work from home?": "Remote work is allowed with manager approval, subject to role requirements.",
    "What is the policy on gifts from clients?": "Gifts over $50 must be reported to compliance and may need to be declined.",
    "How do I get approval for overtime?": "Request approval from your manager before working any overtime hours.",
    "Are personal devices allowed for work purposes?": "Yes, if they meet security requirements and are approved by IT.",
    "Where can I find safety procedures?": "Safety procedures are in the 'Health & Safety' section of the employee portal.",
    "What is the policy on smoking at work?": "Smoking is only allowed in designated outdoor areas.",
    "How do I report a safety hazard?": "Report hazards immediately to your manager and log it in the safety reporting system.",
    "Can I record conversations at work?": "Recording requires consent from all parties and must comply with applicable laws.",
    "What is the company’s stance on workplace relationships?": "Relationships must be disclosed if they could create a conflict of interest.",
    "Can I bring my pet to work?": "Pets are not allowed unless they are certified service animals.",
    "What is the acceptable use policy for company equipment?": "Company equipment is for work purposes only, unless otherwise authorized.",
    "What is the policy on alcohol at company events?": "Alcohol may be served at approved events but must be consumed responsibly.",
    "What is the policy on workplace violence?": "Any threats or acts of violence must be reported immediately to HR or security.",
    "Can I bring visitors to the office?": "Visitors must be registered in advance and check in at reception.",
    "What is the company’s stance on discrimination?": "Discrimination of any kind is prohibited and will result in disciplinary action.",
    "What is the policy on remote work outside the country?": "Working outside the country requires HR and legal approval.",
    "How do I request time off for religious observances?": "Submit your request in advance to your manager for approval.",
    "What is the policy on charitable donations?": "Donations in the company’s name must be approved by corporate social responsibility.",
    "What is the procedure for evacuations?": "Follow posted evacuation routes and the instructions of emergency wardens.",
    "What is the company policy on accessibility?": "The company is committed to providing reasonable accommodations as required by law.",
    "What is the policy on remote monitoring of employees?": "Company systems may be monitored in compliance with applicable laws.",
    "Can I refuse unsafe work?": "Yes, report unsafe conditions to your manager immediately.",
    "What is the company’s stance on retaliation?": "Retaliation against individuals who report concerns in good faith is prohibited.",
    "What is the policy on posting about the company online?": "Any public statements must be approved by corporate communications.",
    "How do I join a project team?": "Speak with your manager about openings in projects relevant to your skills.",
    "How do I submit a project update?": "Use the project management platform to log updates in the weekly status report section.",
    "What should I do if I miss a project deadline?": "Inform your project lead immediately and provide a revised timeline.",
    "How do I request additional resources for a project?": "Submit a resource request to your project manager for review.",
    "Can I work on more than one project at once?": "Yes, with manager approval and as long as deadlines are met.",
    "Where do I find the project scope document?": "Scope documents are stored in the project’s shared folder.",
    "Who do I contact for project approvals?": "Approvals are managed by the project sponsor or steering committee.",
    "How do I escalate a project issue?": "Escalate to your project manager first, then to the program director if unresolved.",
    "What tool is used for project management?": "The company uses a designated project management platform listed on the intranet.",
    "How do I get access to a project workspace?": "Request access from the project administrator through the IT helpdesk.",
    "Can I change a project deadline?": "Only the project sponsor or manager can authorize changes to deadlines.",
    "How do I submit project deliverables?": "Upload them to the designated folder in the project management tool.",
    "What happens if project scope changes?": "Document the changes and seek approval from the project sponsor.",
    "How are project roles assigned?": "Roles are assigned by the project manager based on skills and availability.",
    "How do I log my project hours?": "Record them in the time tracking section of the project management platform.",
    "Who reviews project progress?": "Progress is reviewed by the project sponsor and stakeholders at regular intervals.",
    "What is the process for closing a project?": "The project manager completes final documentation and conducts a closure meeting.",
    "Can I work remotely on a project?": "Yes, if approved by the project manager and the work can be done remotely.",
    "What is the procedure for project budget changes?": "Submit a change request to the finance team and project sponsor for approval.",
    "How do I find out who is on a project team?": "Check the project team roster in the project management platform.",
    "How do I handle project conflicts?": "Address them directly with the involved parties or escalate to the project manager.",
    "What should I do if a project member misses deadlines?": "Notify the project manager and document the impact on timelines.",
    "How are project risks managed?": "Risks are tracked in the project risk log and reviewed during status meetings.",
    "Can I take over a task from another project member?": "Only with project manager approval and coordination.",
    "How do I create a project plan?": "Use the company’s project planning template and follow the planning guidelines.",
    "How do I get assigned as project lead?": "Managers select leads based on experience, performance, and availability.",
    "What is the process for adding a new project task?": "Submit a task addition request to the project manager.",
    "How do I access archived project documents?": "Request them from the records department or project archive folder.",
    "What is the process for changing project objectives?": "Submit a formal change request to the project steering committee.",
    "How do I request additional project funding?": "Provide a justification and revised budget to the finance team and project sponsor.",
    "Can I work flexible hours for a project?": "Yes, with project manager approval and as long as deadlines are met.",
    "How do I set my goals for the year?": "Work with your manager to define measurable goals in the performance system.",
    "Can I request a salary increase?": "Yes, during annual review discussions or with documented justification outside of cycle.",
    "How do I give feedback to my manager?": "Feedback can be provided through the anonymous feedback tool in the HR portal.",
    "Where do I find training opportunities?": "Training schedules and courses are listed in the Learning Management System (LMS).",
    "How do I request mentoring?": "Sign up for the mentoring program through the HR development page.",
    "How do I report concerns about performance ratings?": "Raise the concern with HR or your department head.",
    "How do I know if I am eligible for promotion?": "Review the promotion criteria in the HR portal and speak with your manager.",
    "Can I change my career path within the company?": "Yes, discuss with your manager and apply for internal roles that match your skills.",
    "How do I access my performance metrics?": "They are displayed in your HR portal performance dashboard.",
    "How do I request flexible work arrangements for performance improvement?": "Submit a proposal to your manager for approval.",
    "How do I prepare for a performance review?": "Review your goals, achievements, and feedback in the HR portal.",
    "What is the process for talent assessments?": "HR schedules assessments during annual talent review periods.",
    "Can I request job shadowing?": "Yes, through the HR development program with manager approval.",
    "How are performance bonuses calculated?": "They are based on individual performance ratings and company results.",
    "How do I access HR policies on promotions?": "They are posted in the HR portal under 'Policies and Guidelines'.",
    "Can I request feedback from clients for performance reviews?": "Yes, coordinate with your manager to collect client feedback.",
    "What is the process for recognizing high-performing teams?": "Submit a team recognition request through the HR portal."
  }

In [142]:
# assumes: employee_faq is {question:str -> answer:str}
keys = list(employee_faq)  # snapshot so we can safely delete later

In [143]:
def question_to_token_ids(q, tokenizer, max_len=32):
    enc = tokenizer(
        q,
        add_special_tokens=True,
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    return tuple(enc['input_ids'].squeeze(0).tolist())  # tuple so it can be a dict key


In [144]:
new_dict = {}

for i in range(len(keys)):            # or range(min(N, len(keys))) if you want only first N
    q = keys[i]
    a = employee_faq[q]
    token_ids = question_to_token_ids(q, tokenizer)

    # optional: guard against collisions (rare, but truncation can do it)
    if token_ids in new_dict:
        print(f"⚠️ collision at index {i}; overwriting existing value")

    new_dict[token_ids] = a

print("New dict length:", len(new_dict))
print("Old dict length (still full):", len(employee_faq))


New dict length: 203
Old dict length (still full): 203


In [145]:
def preview(new_dict, tokenizer, n=3):
    pad = tokenizer.pad_token_id
    items = list(new_dict.items())[:n]
    for token_ids, ans in items:
        ids = list(token_ids)
        if pad is not None:
            while ids and ids[-1] == pad:
                ids.pop()
        decoded = tokenizer.decode(ids, skip_special_tokens=True)
        print("Decoded Q:", decoded)
        print("Answer   :", ans)
        print("-" * 50)

preview(new_dict, tokenizer, n=3)


Decoded Q: how do i request vacation time?
Answer   : Submit your request through the HR portal under 'Leave Requests'. Manager approval is required before it is finalized.
--------------------------------------------------
Decoded Q: when is payroll processed?
Answer   : Payroll is processed on the 15th and last day of each month. Direct deposits typically post by the following business day.
--------------------------------------------------
Decoded Q: where can i find my pay stubs?
Answer   : Pay stubs are available in the Payroll section of the HR portal.
--------------------------------------------------


In [146]:
for k in keys:
    del employee_faq[k]
employee_faq.update(new_dict)


In [147]:
print("Final dict length:", len(employee_faq))
some_key = next(iter(employee_faq))  # one token-id key
print("Sample key length:", len(some_key))
print("Sample answer:", employee_faq[some_key][:120], "...")


Final dict length: 203
Sample key length: 32
Sample answer: Submit your request through the HR portal under 'Leave Requests'. Manager approval is required before it is finalized. ...


In [149]:
import torch.nn.functional as F

def find_best_match(user_q, tokenizer, model, faq_embeddings, faq_answers):
    # embed user query
    enc = tokenizer(user_q, add_special_tokens=True, max_length=32,
                    padding='max_length', truncation=True, return_tensors='pt')
    with torch.no_grad():
        user_emb = model(**enc).pooler_output.squeeze(0)  # [hidden_size]

    # compute cosine similarities
    sims = F.cosine_similarity(user_emb.unsqueeze(0), faq_embeddings)  # [num_faqs]

    # get best match
    best_idx = torch.argmax(sims).item()
    return faq_answers[best_idx], sims[best_idx].item()


In [150]:
question = "Where do I see my paycheck?"
answer, score = find_best_match(question, tokenizer, model, faq_embeddings, faq_answers)

print("User Q:", question)
print("Best match:", answer)
print("Similarity score:", score)


TypeError: cosine_similarity(): argument 'x2' (position 2) must be Tensor, not list