In [20]:
from semantic_router import Route
from semantic_router.encoders import OpenAIEncoder, HuggingFaceEncoder
from semantic_router import RouteLayer
import pandas as pd

In [21]:
# Load the dataframes json files
df_synthetic = pd.read_json("synthetic_intetions.json")

X_syn = df_synthetic[['Id','Message']]
y_syn = df_synthetic['Intention'].to_list()

In [22]:
y_syn

['find_therapist',
 'find_therapist',
 'find_therapist',
 'find_therapist',
 'find_therapist',
 'find_therapist',
 'find_therapist',
 'find_therapist',
 'find_therapist',
 'find_therapist',
 'find_support_group',
 'find_support_group',
 'find_support_group',
 'find_support_group',
 'find_support_group',
 'find_support_group',
 'find_support_group',
 'find_support_group',
 'find_support_group',
 'find_support_group',
 'find_hotline',
 'find_hotline',
 'find_hotline',
 'find_hotline',
 'find_hotline',
 'find_hotline',
 'find_hotline',
 'find_hotline',
 'find_hotline',
 'find_hotline',
 'habit_alternatives',
 'habit_alternatives',
 'habit_alternatives',
 'habit_alternatives',
 'habit_alternatives',
 'habit_alternatives',
 'habit_alternatives',
 'habit_alternatives',
 'habit_alternatives',
 'habit_alternatives',
 'insert_mood',
 'insert_mood',
 'insert_mood',
 'insert_mood',
 'insert_mood',
 'insert_mood',
 'insert_mood',
 'insert_mood',
 'insert_mood',
 'insert_mood',
 'insert_journal',
 

In [23]:
from sklearn.model_selection import train_test_split

# Split the dataset with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X_syn, y_syn, test_size=0.2, random_state=0
)

In [24]:
# Replace "None" with None
y_train = [None if i == "None" else i for i in y_train]
y_test = [None if i == "None" else i for i in y_test]

In [25]:
# Initialize lists for each intention
find_therapist_messages = []
find_support_group_messages = []
find_hotline_messages = []
habit_alternatives_messages = []
insert_mood_messages = []
insert_journal_messages = []
ask_missionvalues_messages = []
ask_features_messages = []
review_user_memory_messages = []
update_journal_messages = []
chat_about_journal_messages = []
gratitude_messages = []

# Categorize messages based on labels
for message, label in zip(X_train["Message"], y_train):
    if label == "find_therapist":
        find_therapist_messages.append(message)
    elif label == "find_support_group":
        find_support_group_messages.append(message)
    elif label == "find_hotline":
        find_hotline_messages.append(message)
    elif label == "habit_alternatives":
        habit_alternatives_messages.append(message)
    elif label == "insert_mood":
        insert_mood_messages.append(message)
    elif label == "insert_journal":
        insert_journal_messages.append(message)
    elif label == "ask_missionvalues":
        ask_missionvalues_messages.append(message)
    elif label == "ask_features":
        ask_features_messages.append(message)
    elif label == "review_user_memory":
        review_user_memory_messages.append(message)
    elif label == "update_journal":
        update_journal_messages.append(message)
    elif label == "chat_about_journal":
        chat_about_journal_messages.append(message)
    elif label == "entry_gratitude":
        gratitude_messages.append(message)    

# Define routes for each intention
find_therapist = Route(
    name="find_therapist",
    description="The user wants to receive a personalized recommendation for a healthcare professional.",
    utterances=find_therapist_messages,
)

find_support_group = Route(
    name="find_support_group",
    description="The user wants to find support groups in their vicinity.",
    utterances=find_support_group_messages,
)

find_hotline = Route(
    name="find_hotline",
    description="The user wants to access the contact information for emergency or non-emergency hotlines.",
    utterances=find_hotline_messages,
)

habit_alternatives = Route(
    name="habit_alternatives",
    description="The user wants to find a healthier or more sustainable alternative to a habit they currently have.",
    utterances=habit_alternatives_messages,
)

insert_mood = Route(
    name="insert_mood",
    description="The user wants to record their thoughts, feelings, or reflections by making an entry in their journal or mood board.",
    utterances=insert_mood_messages,
)

insert_journal = Route(
    name="insert_journal",
    description="The user wants to contribute a message of gratitude or positivity to the community gratitude banner.",
    utterances=insert_journal_messages,
)

ask_missionvalues = Route(
    name="ask_missionvalues",
    description="The user wants to learn about the mission, vision, and values of Squeak to Speak.",
    utterances=ask_missionvalues_messages,
)

ask_features = Route(
    name="ask_features",
    description="The user wants an overview of the features and functionalities of Squeak to Speak.",
    utterances=ask_features_messages,
)

review_user_memory = Route(
    name="review_user_memory",
    description="The user wants to review the data that Squeak to Speak has collected about them.",
    utterances=review_user_memory_messages,
)

update_journal = Route(
    name="update_journal",
    description="The user wants to modify an existing entry in their journal or mood board.",
    utterances=update_journal_messages,
)

chat_about_journal = Route(
    name="chat_about_journal",
    description="The user wants to engage in a conversation with the chatbot, leveraging the knowledge of their past journal entries.",
    utterances=chat_about_journal_messages,
)

insert_gratitude = Route(
    name="insert_gratitude",
    description="The user wants to insert their gratitude message into the database",
    utterances=gratitude_messages,
)


In [26]:
routes = [
        review_user_memory,
        find_therapist,
        find_support_group,
        find_hotline,
        habit_alternatives,
        insert_mood,
        insert_journal,
        ask_missionvalues,
        ask_features,
        review_user_memory,
        update_journal,
        chat_about_journal,
        insert_gratitude
        ]
encoder = HuggingFaceEncoder()

In [27]:
hf_rl = RouteLayer(encoder=encoder, routes=routes) #aggregation = "mean", "max" or "sum". #top_k = 5

In [28]:
# Call the fit method
hf_rl.fit(X=X_train["Message"].to_list(), y=y_train, max_iter=500)

Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings: 100%|██████████| 1/1 [00:02<00:00,  2.67s/it]
Training: 100%|██████████| 500/500 [01:20<00:00,  6.19it/s, acc=0.84]


In [29]:
from collections import defaultdict


# Initialize counters for each intention
results = defaultdict(lambda: {"Test Inputs": 0, "Correct": 0, "Incorrect": 0})

# Iterate through test data to manually evaluate predictions
for message, true_label in zip(X_test["Message"], y_test):
    # Evaluate the single message to get the prediction
    single_accuracy = hf_rl.evaluate(X=[message], y=[true_label])
    
    # Increment the total test inputs for the true label
    results[true_label]["Test Inputs"] += 1
    
    # Increment correct or incorrect based on single message evaluation accuracy
    if single_accuracy == 1.0:  # Perfect match means prediction was correct
        results[true_label]["Correct"] += 1
    else:
        results[true_label]["Incorrect"] += 1

# Calculate accuracy for each intention
for intention, data in results.items():
    data["Accuracy (%)"] = round((data["Correct"] / data["Test Inputs"]) * 100, 2)

# Display the results in table format
print(f"{'Intention':<25}{'Test Inputs':<15}{'Correct':<10}{'Incorrect':<10}{'Accuracy (%)':<15}")
for intention, data in results.items():
    print(f"{intention:<25}{data['Test Inputs']:<15}{data['Correct']:<10}{data['Incorrect']:<10}{data['Accuracy (%)']:<15}")

# Calculate overall accuracy
total_inputs = sum(data["Test Inputs"] for data in results.values())
total_correct = sum(data["Correct"] for data in results.values())
overall_accuracy = round((total_correct / total_inputs) * 100, 2)

print(f"{'Average Accuracy':<25}{total_inputs:<15}{total_correct:<10}{total_inputs - total_correct:<10}{overall_accuracy:<15}")



Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 14.80it/s]
Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  9.23it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.75it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  5.83it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  9.23it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  8.96it/s]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  4.04it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  5.13it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  7.61it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  5.13it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  7.38it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  5.97it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  8.61it/s]
Generating embeddings: 100%|██████

Intention                Test Inputs    Correct   Incorrect Accuracy (%)   
find_therapist           3              3         0         100.0          
insert_journal           3              2         1         66.67          
insert_mood              3              1         2         33.33          
find_hotline             2              2         0         100.0          
ask_missionvalues        2              2         0         100.0          
update_journal           6              0         6         0.0            
insert_gratitude         1              0         1         0.0            
find_support_group       2              2         0         100.0          
ask_features             1              1         0         100.0          
review_user_memory       2              0         2         0.0            
Average Accuracy         25             13        12        52.0           





In [30]:
hf_rl.to_json("layer.json")

[32m2025-01-03 16:15:16 INFO semantic_router.utils.logger Saving route config to layer.json[0m
