In [52]:
from semantic_router import Route
from semantic_router.encoders import OpenAIEncoder, HuggingFaceEncoder
from semantic_router import RouteLayer
import pandas as pd

In [53]:
# Load the dataframes json files
df_synthetic = pd.read_json("synthetic_intetions.json")

X_syn = df_synthetic[['Id','Message']]
y_syn = df_synthetic['Intention'].to_list()

In [54]:
y_syn

['rec_therapist',
 'rec_therapist',
 'rec_therapist',
 'rec_therapist',
 'rec_therapist',
 'rec_therapist',
 'rec_therapist',
 'rec_therapist',
 'rec_therapist',
 'rec_therapist',
 'rec_group',
 'rec_group',
 'rec_group',
 'rec_group',
 'rec_group',
 'rec_group',
 'rec_group',
 'rec_group',
 'rec_group',
 'rec_group',
 'rec_hotline',
 'rec_hotline',
 'rec_hotline',
 'rec_hotline',
 'rec_hotline',
 'rec_hotline',
 'rec_hotline',
 'rec_hotline',
 'rec_hotline',
 'rec_hotline',
 'alt_habit',
 'alt_habit',
 'alt_habit',
 'alt_habit',
 'alt_habit',
 'alt_habit',
 'alt_habit',
 'alt_habit',
 'alt_habit',
 'alt_habit',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_journal_mood',
 'entry_banner',
 'entry_banner',
 'entry_banner',
 'entry_banner',
 'entry_banner',
 'entry_banner',
 'entry_banner',
 'entry_banner',
 'entry_banner

In [55]:
from sklearn.model_selection import train_test_split

# Split the dataset with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X_syn, y_syn, test_size=0.1, random_state=0
)

In [56]:
# Replace "None" with None
y_train = [None if i == "None" else i for i in y_train]
y_test = [None if i == "None" else i for i in y_test]

In [57]:
# Initialize lists for each intention
rec_therapist_messages = []
rec_group_messages = []
rec_hotline_messages = []
alt_habit_messages = []
entry_journal_mood_messages = []
entry_banner_messages = []
know_mission_messages = []
know_services_messages = []
know_data_messages = []
alter_entry_messages = []
recall_entry_messages = []

# Categorize messages based on labels
for message, label in zip(X_train["Message"], y_train):
    if label == "rec_therapist":
        rec_therapist_messages.append(message)
    elif label == "rec_group":
        rec_group_messages.append(message)
    elif label == "rec_hotline":
        rec_hotline_messages.append(message)
    elif label == "alt_habit":
        alt_habit_messages.append(message)
    elif label == "entry_journal_mood":
        entry_journal_mood_messages.append(message)
    elif label == "entry_banner":
        entry_banner_messages.append(message)
    elif label == "know_mission":
        know_mission_messages.append(message)
    elif label == "know_services":
        know_services_messages.append(message)
    elif label == "know_data":
        know_data_messages.append(message)
    elif label == "alter_entry":
        alter_entry_messages.append(message)
    elif label == "recall_entry":
        recall_entry_messages.append(message)

# Define routes for each intention
rec_therapist = Route(
    name="rec_therapist",
    description="The user wants to receive a personalized recommendation for a healthcare professional.",
    utterances=rec_therapist_messages,
)

rec_group = Route(
    name="rec_group",
    description="The user wants to find support groups in their vicinity.",
    utterances=rec_group_messages,
)

rec_hotline = Route(
    name="rec_hotline",
    description="The user wants to access the contact information for emergency or non-emergency hotlines.",
    utterances=rec_hotline_messages,
)

alt_habit = Route(
    name="alt_habit",
    description="The user wants to find a healthier or more sustainable alternative to a habit they currently have.",
    utterances=alt_habit_messages,
)

entry_journal_mood = Route(
    name="entry_journal_mood",
    description="The user wants to record their thoughts, feelings, or reflections by making an entry in their journal or mood board.",
    utterances=entry_journal_mood_messages,
)

entry_banner = Route(
    name="entry_banner",
    description="The user wants to contribute a message of gratitude or positivity to the community gratitude banner.",
    utterances=entry_banner_messages,
)

know_mission = Route(
    name="know_mission",
    description="The user wants to learn about the mission, vision, and values of Squeak to Speak.",
    utterances=know_mission_messages,
)

know_services = Route(
    name="know_services",
    description="The user wants an overview of the features and functionalities of Squeak to Speak.",
    utterances=know_services_messages,
)

know_data = Route(
    name="know_data",
    description="The user wants to review the data that Squeak to Speak has collected about them.",
    utterances=know_data_messages,
)

alter_entry = Route(
    name="alter_entry",
    description="The user wants to modify an existing entry in their journal or mood board.",
    utterances=alter_entry_messages,
)

recall_entry = Route(
    name="recall_entry",
    description="The user wants to engage in a conversation with the chatbot, leveraging the knowledge of their past journal entries.",
    utterances=recall_entry_messages,
)


In [58]:
routes = [
    rec_therapist,
    rec_group,
    rec_hotline,
    alt_habit,
    entry_journal_mood,
    entry_banner,
    know_mission,
    know_services,
    know_data,
    alter_entry,
    recall_entry,
]
encoder = HuggingFaceEncoder()



In [59]:
hf_rl = RouteLayer(encoder=encoder, routes=routes) #aggregation = "mean", "max" or "sum". #top_k = 5

In [60]:
# Call the fit method
hf_rl.fit(X=X_train["Message"].to_list(), y=y_train, max_iter=500)

Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  2.03it/s]
Training: 100%|██████████| 500/500 [00:40<00:00, 12.47it/s, acc=0.92]


In [61]:
accuracy = hf_rl.evaluate(X=X_test["Message"].to_list(), y=y_test)
print(f"Accuracy: {accuracy*100:.2f}%")

Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 10.98it/s]

Accuracy: 83.33%





In [62]:
hf_rl.to_json("layer.json")

[32m2024-12-29 16:15:29 INFO semantic_router.utils.logger Saving route config to layer.json[0m
