In [111]:
import os
from getpass import getpass
from semantic_router import RouteLayer
from semantic_router.encoders import HuggingFaceEncoder
from transformers import RobertaTokenizer, RobertaForSequenceClassification

In [112]:
# Replace the following line with the appropriate Hugging Face model name
model_name = "FacebookAI/roberta-base"  # Example model name, replace with desired model
encoder = HuggingFaceEncoder(model_name=model_name)

In [113]:
from semantic_router import Route

# we could use this as a guide for our chatbot to avoid political conversations
politics = Route(
    name="handbook",
    utterances=[
        "What are the academic requirements for undergraduate admission to the University of Michigan?",
        "How does the University of Michigan define residency for tuition purposes?",
        "What are the procedures for undergraduate students to declare or change their major?",
        "Can students pursue double majors or minors at the University of Michigan, and if so, what are the requirements?",
        "What types of financial aid are available to undergraduate students at the University of Michigan, and how can students apply for them?",
        "What academic support services does the University of Michigan offer to students, such as tutoring, advising, or study groups?",
        "What are the policies and procedures for academic probation and dismissal at the University of Michigan?",
        "Are there opportunities for undergraduate students to participate in research or internships, and how can they get involved?",
        "What are the requirements and options for housing and dining plans for students living on campus at the University of Michigan?",
        "How does the University of Michigan address issues related to diversity, equity, and inclusion on campus, and what resources are available to support students from underrepresented backgrounds?"
    ],
)

# this could be used as an indicator to our chatbot to switch to a more
# conversational prompt
chitchat = Route(
    name="syllabus",
    utterances=[
        'what fundamental concepts and tools are covered, and how do they lay the groundwork for more advanced coursework?',
        'Can you provide details about the curriculum and assignments?',
        'How does the course integrate statistical theory with practical applications in data analysis, and what programming languages are utilized?',
        'In the "Big Data Processing and Analytics" course, what technologies and platforms are explored for managing and analyzing large-scale datasets, and what hands-on projects are included?',
        'Can you describe the syllabus?',
        'How much is the mark for assignment 2 ?',
        'How are the assignments distributed ? ',
        'How does the course address challenges specific to processing unstructured text data, and what real-world applications are discussed?',
        'In the "Advanced Topics in Data Science" course, what cutting-edge methodologies or research areas are explored, and how do they contribute to the field of data science?',
        'Can you provide an overview of the elective courses available within the Applied Data Science program, and how do they allow students to tailor their studies to their interests or career goals?',
        'How does the program incorporate practical experience through internships, capstone projects, or industry collaborations, and what opportunities are available for students to gain hands-on experience?',
        'How does the curriculum address ethical considerations and societal implications of data science, particularly in courses like "Ethics in Data Science" or "Privacy and Security in Data"?'
    ],
)

# we place both of our decisions together into single list
routes = [politics, chitchat]

In [114]:
from semantic_router.layer import RouteLayer

rl = RouteLayer(encoder=encoder, routes=routes)

[32m2024-03-22 16:11:26 INFO semantic_router.utils.logger local[0m


In [115]:
rl("Describe the syllabus for siads-523").name


'syllabus'

In [116]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/ms-marco-MiniLM-L-12-v2')
tokenizer = AutoTokenizer.from_pretrained('cross-encoder/ms-marco-MiniLM-L-12-v2')

features = tokenizer(['How many people live in Berlin?', 'How many people live in Berlin?'], ['Berlin has a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.', 'New York City is famous for the Metropolitan Museum of Art.'],  padding=True, truncation=True, return_tensors="pt")

model.eval()
with torch.no_grad():
    scores = model(**features).logits
    print(scores)

tensor([[  9.4506],
        [-11.1605]])


In [120]:
from sentence_transformers import CrossEncoder
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', max_length=512)

handbook = [
        "What are the academic requirements for undergraduate admission to the University of Michigan?",
        "How does the University of Michigan define residency for tuition purposes?",
        "What are the procedures for undergraduate students to declare or change their major?",
        "Can students pursue double majors or minors at the University of Michigan, and if so, what are the requirements?",
        "What types of financial aid are available to undergraduate students at the University of Michigan, and how can students apply for them?",
        "What academic support services does the University of Michigan offer to students, such as tutoring, advising, or study groups?",
        "What are the policies and procedures for academic probation and dismissal at the University of Michigan?",
        "Are there opportunities for undergraduate students to participate in research or internships, and how can they get involved?",
        "What are the requirements and options for housing and dining plans for students living on campus at the University of Michigan?",
        "How does the University of Michigan address issues related to diversity, equity, and inclusion on campus, and what resources are available to support students from underrepresented backgrounds?"
    ]

query = 'how is the tuition ?'
predictions = []

for element in handbook:
    predictions.append((query,element))
    
scores = model.predict(predictions)

print(scores)
print("average score: ", sum(scores)/len(scores))


[ -9.715397  -6.222407 -10.756883 -10.353505  -9.864408 -10.829663
 -11.114125 -10.674078 -10.627038 -10.923809]
average score:  -10.108131265640258
