In [4]:
%pip install torch transformers faiss-cpu numpy rouge-score nltk sacrebleu
%pip install sentence-transformers
%pip install langchain
%pip install -U langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.2-py3-none-any.whl.metadata (2.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.5.2-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata (7.2 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloa

In [6]:
# import libraries
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import faiss
import time
import psutil
from functools import lru_cache
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [14]:
def clean_definitions(definitions):
    cleaned_definitions = []
    for definition in definitions:
        cleaned_definition = definition.strip().replace('\n', ' ').replace('&lt;', '<').replace('&gt;', '>')
        cleaned_definitions.append(cleaned_definition)
    return cleaned_definitions


In [7]:
# define class fit the format
class Document:
    def __init__(self, page_content, metadata=None, doc_id=None):
        self.page_content = page_content
        self.metadata = metadata or {}
        self.id = doc_id if doc_id is not None else hash(page_content)


# Load definition files
def load_definitions(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return [line.strip() for line in file if line.strip()]

In [8]:
# Generate embeddings for definitions
class Retriever:
    def __init__(self, definitions, num_retrieved_docs=5):
        all_documents = [Document(definition, doc_id=str(i)) for i, definition in enumerate(definitions)]
        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        self.db = FAISS.from_documents(all_documents, embeddings)
        self.retriever = self.db.as_retriever(search_kwargs={"k": num_retrieved_docs})
        self.index = self.db.index

    def search(self, query):
        return self.retriever.get_relevant_documents(query)

In [36]:
# setup the generation model
class T5Assistant:
    def __init__(self, model_name='t5-small'):
        self.tokenizer = T5Tokenizer.from_pretrained(model_name)
        self.model = T5ForConditionalGeneration.from_pretrained(model_name)

    def create_prompt(self, query, retrieved_info):
       return (f"Answer the question clearly based on the relevant information provided.\n"
                f"Query: {query}\n"
                f"Relevant Information: {retrieved_info}\n"
                f"Response:")

    def generate_reply(self, query, retrieved_info):
        prompt = self.create_prompt(query, retrieved_info)
        input_ids = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).input_ids
        outputs = self.model.generate(input_ids, max_length=150, num_beams=5, top_k=50, temperature=0.7, early_stopping=True)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)




In [10]:
def cleanreplys(reply):
    cleaned_reply = reply.replace("Instruction:", "").replace("Relevant information:", "").strip()
    return cleaned_reply


In [11]:
def calculate_bleu(reference_list, candidate):
    references = [ref.split() for ref in reference_list]
    candidate = candidate.split()
    smoothie = SmoothingFunction().method4
    return sentence_bleu(references, candidate, smoothing_function=smoothie)

def print_memory_usage():
    process = psutil.Process()
    memory_info = process.memory_info()
    print(f"Memory Usage: {memory_info.rss / 1024 / 1024:.2f} MB")

@lru_cache(maxsize=10)
def cached_generate_reply(assistant, query, retrieved_info):
    return assistant.generate_reply(query, retrieved_info)

In [37]:
if __name__ == "__main__":
    definitions = load_definitions('ctx_pd.txt')
    definitions = clean_definitions(definitions)

    # initialized retriever model and agenerate model
    retriever = Retriever(definitions, num_retrieved_docs=5)
    assistant = T5Assistant(model_name='t5-small')

    # sample query
    generalquestions = [
    "What is the definition of Articulation?",
    "Which accreditation framework is mentioned under the definition of Doctoral Degree?",
    "Which organization is referenced in the definition of Assessment?",
    "What is the difference between a Bachelor Degree and a Bachelor Honours Degree with reference to qualification level in the AQF?",
    "If a student studies in a foreign educational institution and is not a citizen or permanent resident of Australia, what term would the dataset use to categorize them as per the definitions?",
    "If a student completes a Certificate III and intends to directly pursue a Bachelor Degree, which concept from this dataset would likely apply to their transition?",
    "Describe the relationship between Foundation Course and Pathway Course based on their respective definitions.",
    "What percentage of modules studied that received a pass grade is referred to in the dataset, and under what name is this metric captured?",
    "If a student qualifies under the Doctoral Degree (Research) category of the AQF, what learning outcome is significant in their qualification process?",
    "Based on the definitions provided, how would the process of Admission differ from the process of Application, and what criteria must a student meet to progress from one to the other?"]


    for query in generalquestions:
        start_time = time.time()

        # search related definition
        retrieved_docs = retriever.search(query)
        retrinfo = " ".join([doc.page_content for doc in retrieved_docs[:3]])
        referencess = [doc.page_content for doc in retrieved_docs]

        # generate the reply
        geberakreplyss = assistant.generate_reply(query, retrinfo)
        geberakreplyss = cleanreplys(geberakreplyss)

        # calculate the Bleu score
        bleu_score = calculate_bleu(referencess, geberakreplyss) if referencess else "N/A"

        # print the result
        print(f"Query: {query}")
        print(f"Generated Reply:\n{geberakreplyss}")
        print(f"Reference Answers:\n{referencess}")
        print(f"BLEU Score: {bleu_score}")

        print_memory_usage()

        end_time = time.time()
        print(f"Time taken for this query: {end_time - start_time:.2f} seconds\n")




Query: What is the definition of Articulation?
Generated Reply:
: What is the definition of Articulation? Relevant Information: Articulation is Arrangements enabling students to progress from a completed qualification to another with admission and/or credit in a defined qualification pathway Or qualifying to enter the Bachelor program. created by TEQSA Matriculation is b>Matriculation means being admitted to a university having met the entry requirements to do so. /b> created by NULL - an ability to present Response:
Reference Answers:
['Articulation is Arrangements enabling students to progress from a completed qualification to another with admission and/or credit in a defined qualification pathway  Or qualifying to enter the Bachelor program. created by TEQSA', 'Matriculation is <b>Matriculation means being admitted to a university having met the entry requirements to do so. </b> created by NULL', '- an ability to present', '(c)   a level of academic achievement to move from a lower 



Query: Which accreditation framework is mentioned under the definition of Doctoral Degree?
Generated Reply:
based on the relevant information provided. Query: Which accreditation framework is mentioned under the definition of Doctoral Degree? Query: Which accreditation framework is mentioned under the definition of Doctoral Degree? Query: Which accreditation framework is mentioned under the definition of Doctoral Degree?
Reference Answers:
['"Doctoral Degree is Course with major research component: comprised of two-thirds or more research leading to a thesis/dissertation OR qualifies individuals who apply a substantial body of knowledge to research, investigate and develop new knowledge, in one or more fields of investigation, scholarship or professional practice. Two forms of Doctoral Degree with the same descriptor within the Doctoral Degree qualification type: the Doctoral Degree (Research) and the Doctoral Degree (Professional). Research is the defining characteristic of all Doctor



Query: Which organization is referenced in the definition of Assessment?
Generated Reply:
: Which organization is referenced in the definition of Assessment? Query: Which organization is referenced in the definition of Assessment? Answer the question clearly based on the relevant information provided. Query: Which organization is referenced in the definition of Assessment?
Reference Answers:
['"Assessment is Assessment is the process of appraising, evaluating and making a judgment about someone\'s knowledge, skills and ability in order to determine if they have met with and achieved the designated learning outcomes of individual units of study.', '"Assessment may be formative or summative and occur via a number of tactics e.g. written and oral tests and examinations; essays; engagement in tutorials and group projects; interviews; observations; class quizzes etc created by TEQSA"""', 'Assessment Result is The mark given for an assessment item created by The University of Western Austral



Query: What is the difference between a Bachelor Degree and a Bachelor Honours Degree with reference to qualification level in the AQF?
Generated Reply:
: What is the difference between a Bachelor Degree and a Bachelor Honours Degree? Query: What is the difference between a Bachelor Degree and a Bachelor Honours Degree with reference to qualification level in the AQF?
Reference Answers:
['"Bachelor Honours Degree is Qualifies individuals who apply a body of knowledge in a specific context to undertake professional work and as a pathway for research and further learning. Bachelor Honours Degree qualifications are located at level 8 of the Australian Qualifications Framework. Qualifications must be designed and accredited to enable graduates to demonstrate the learning outcomes expressed as knowledge, skills and the application of knowledge and skills specified in the level 8 criteria and the Bachelor Degree descriptor. created by AQF (Australian Qualifications Framework)"', '"Bachelor D



Query: If a student studies in a foreign educational institution and is not a citizen or permanent resident of Australia, what term would the dataset use to categorize them as per the definitions?
Generated Reply:
students who study in foreign educational institutions. In Australia you are considered to be an overseas student if you’re not an Australian citizen "International Student is Any student who is not a domestic student OR a student who is NOT one of the following: Destination Provider home Location Citizen, Destination Provider home Location permanent, humanitarian visa holder, Destination Provider home Location holder of a permanent visa other than a permanent humanitarian visa" Response:
Reference Answers:
['Students who study in foreign educational institutions. In Australia you are considered to be an overseas student if you’re not an Australian citizen', '"International Student is Any student who is not a domestic student OR a student who is NOT one of the following: Dest



Query: If a student completes a Certificate III and intends to directly pursue a Bachelor Degree, which concept from this dataset would likely apply to their transition?
Generated Reply:
to the next year level; (b) a level of academic achievement required to move from one year of a course to the next; (c) a level of academic achievement required to move from one year of a course to the next; (c) a level of academic achievement required to move from one year of a course to the next; (c) a level of academic achievement required to move from one year of a course to the next; (c) a level of academic achievement required to move from one
Reference Answers:
['"Certificate III is Qualifies individuals who apply a broad range of knowledge and skills in varied contexts to undertake skilled work and as a pathway for further learning. Designed and accredited to enable graduates to demonstrate the learning outcomes expressed as knowledge, skills and the application of knowledge and skills specifie



Query: Describe the relationship between Foundation Course and Pathway Course based on their respective definitions.
Generated Reply:
e.g. Foundation = 5.5 IELTS; Diploma = 6.0 IELTS OR A path or sequence of learning or experience that can be followed to achieve a final study goal. Query: Describe the relationship between Foundation Course and Pathway Course based on their respective definitions. Query: Describe the relationship between Foundation Course and Pathway Course based on their respective definitions.
Reference Answers:
['"Pathway Course is An award or non-award course/program that a student completes ahead of access to a subsequent course (e.g. ELICOS ahead of Foundation Program; diploma ahead of degree, undergraduate degree ahead of postgraduate degree etc).  Pathway courses/programs typically include: ELICOS, Foundation, Diploma, Pre-Masters, but may also include other sub-bachelor courses such as associate degrees. created by TEQSA"', '"Pathway is UNDERGRADUATE STUDIES in



Query: What percentage of modules studied that received a pass grade is referred to in the dataset, and under what name is this metric captured?
Generated Reply:
: What percentage of modules studied that received a pass grade is referred to in the dataset. Query: What percentage of modules studied that received a pass grade is referred to in the dataset, and under what name is this metric captured? Response:
Reference Answers:
['Assessment Result is The mark given for an assessment item created by The University of Western Australia', '"Retention Rate is Navitas Retention Rate (Census to Census):', '"Used as a standard measure of student load across the University for statistical and budgeting purposes. created by Australian Government Department of Education and Training (HEIMS)"""', '(a)   the number of credit points or particular subjects which must be completed at each year level of the course before proceeding to the next year level;', '"Unit Result is A code which identifies whet



Query: If a student qualifies under the Doctoral Degree (Research) category of the AQF, what learning outcome is significant in their qualification process?
Generated Reply:
in the Doctoral Degree (Research) category of the AQF, what learning outcome is significant in their qualification process? Query: Doctoral Degree is Course with major research component: comprised of two-thirds or more research leading to a thesis/dissertation OR qualifies individuals who apply a substantial body of knowledge to research, investigate and develop new knowledge. Designed and accredited to enable graduates to demonstrate learning outcomes expressed as knowledge, skills and the application of knowledge and skills specified in the level 5 criteria and
Reference Answers:
['"Doctoral Degree is Course with major research component: comprised of two-thirds or more research leading to a thesis/dissertation OR qualifies individuals who apply a substantial body of knowledge to research, investigate and develo



Query: Based on the definitions provided, how would the process of Admission differ from the process of Application, and what criteria must a student meet to progress from one to the other?
Generated Reply:
; (b) a level of academic achievement required to move from one year of a course to the next; (c) a level of academic achievement required to move from one year of a course to the next; (c) a level of academic achievement required to move from one year of a course to the next; (c) a level of academic achievement required to move from one year of a course to the next; (c) a level of academic achievement to move from a lower qualification to a
Reference Answers:
['(b)   a level of academic achievement required to move from one year of a course to the next;', '"Programme Field (Legacy) is Structured stream in a field of study or discipline within a course. Progression is defined as moving from one course stage in a ‘pathway’ to another course stage in the ‘pathway’ OR moving from first