In [11]:
from transformers import BertTokenizer, TFBertModel,BertForQuestionAnswering,BertForMaskedLM,BertModel
from transformers import T5ForConditionalGeneration, T5Tokenizer
from transformers import pipeline
import logging
import tensorflow as tf
import torch
from nltk.tokenize import sent_tokenize
import faiss
import numpy as np

logging.getLogger("transformers.modeling_utils").setLevel(logging.WARNING)

In [34]:
tokenizer = BertTokenizer.from_pretrained('./pretrained')
QA = BertForQuestionAnswering.from_pretrained("./QA_model")
model = BertModel.from_pretrained("./model")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [35]:
def embed(text,pool=False):
    tokens = tokenizer(text,return_tensors="pt")
    outputs = model(**tokens)
    
    if pool:
        return tokens,outputs["pooler_output"]
    return tokens,outputs["last_hidden_state"]

In [36]:
context= """The output of thesis course in BINUS is to publish the thesis paper at reputable international scholarly publication. To achieve this goal, thesis paper in the International Relations Program is structured as scholarly literature, like academic journal article or conference paper.

There are two thesis schemes that students can choose: (1) regular/non-class scheme (nonkelas) and (2) scholarly article scheme (artikel ilmiah).

There are two thesis course schemes that students can choose: (1) 6-credit Thesis course and (2) 2+4-credit Pre-Thesis and Thesis courses.

Output of Thesis Course: Scholarly Publication
Students are expected to submit their thesis paper at reputable international scholarly publication, either by co-authoring with their supervisors or as single author.

Examples of students’ thesis-based scholarly publication: download here

Format of Thesis Paper: Scholarly Literature
Thesis paper is structured as scholarly literature, like academic journal article or conference paper, not in chapters. The sections are:

Abstract
Introduction
Literature Review
Research Method
Findings, Analysis, and Discussion
Conclusion
Bibliography/References
Thesis word count is as a journal article or conference paper; approximately 5,000-8,000 words.

Thesis Schemes
There are two thesis schemes that students can choose: (1) regular/non-class scheme (nonkelas) and (2) scholarly article scheme (artikel ilmiah).

In the regular/non-class scheme (nonkelas), students finish writing their thesis, submit the thesis soft cover, undergo thesis defense, and finally submit the thesis paper to scholarly publication (academic journal or conference) after the process is complete. The thesis is graded based on the thesis content, supervisory process, and thesis defense process.

In the scholarly article scheme (artikel ilmiah), students finish writing their thesis and submit the thesis paper to scholarly publication (academic journal or conference) before submitting the thesis soft cover and undergo thesis defense. The thesis defense is in the form of poster presentation. The thesis is graded based on the status of submission at scholarly publication (submitted, under review, revised, or accepted).

Thesis Course Schemes
There are two thesis course schemes that students can choose: (1) 6-credit Thesis course and (2) 2+4-credit Pre-Thesis and Thesis courses.

The 6-credit Thesis course is the thesis course that students take in one semester.

The 2+4-credit Pre-Thesis and Thesis courses are two courses that students take in two different semesters but are considered as one thesis course. Students first take the 2-credit Pre-Thesis course with thesis proposal as the output. In the next semester, students take the 4-credit Thesis course. With this scheme, students can take thesis courses at the same semester as 3+1 enrichment programs or the Kampus Merdeka program, and outstanding students can aim to graduate in 7 semesters. The requirement to choose this scheme is a GPA greater than or equal to 3.25.

How to Sign Up for Thesis?
Students must sign up for thesis to the International Relations Program at the same time as registering to SSC. Students register to the International Relations Program to get supervisory and to SSC to get enrollment status.

For the 2023/2024 even semester, students sign up for thesis to the International Relations Program by filling out the Thesis Registration Form [https://bit.ly/regskripsiHIBNgenap2324].

After filling out the form, students contact the potential supervisors for their thesis to discuss the choice of thesis scheme, topics, and title.

If the lecturers agree to be their thesis supervisor, students fill out the Thesis Supervisory Form [https://bit.ly/bimbinganskripsiHIBNgenap2324].

Based on the form responses, the International Relations Program will issue a Thesis Supervisor Appointment Letter (Surat Penunjukan Pembimbing Skripsi/SPP).

Who Can Be Thesis Supervisors?
As of the 2023/2024 odd semester, thesis supervisors at the International Relations Program are:"""

In [37]:
contexts = sent_tokenize(context)
knowledge = []
for i in contexts:
    knowledge.append(embed(i,pool=True)[1])

In [49]:
knowledge = torch.stack(knowledge,dim=1)[0]

In [57]:
knowledge = knowledge.detach().numpy()

In [137]:
class XFaiss(faiss.IndexFlatL2):
    
    def __init__(self,d):
        super(XFaiss,self).__init__(d)
        self.dict = {}
        
    def feed(self,data:[[str,torch.tensor]]):
        for i in data:
            self.dict[tuple(i[1])]=i[0]
    
    def get(self,tensors):
        l = []
        for i in tensors:
            l.append(self.dict[tuple(i)])
        return l

In [138]:
index = XFaiss(1024)
index.add(knowledge)

In [139]:
for i in zip(contexts,knowledge):
    print(i[0])
    break

The output of thesis course in BINUS is to publish the thesis paper at reputable international scholarly publication.


In [140]:
index.feed(zip(contexts,knowledge))

In [141]:
q = "what is the format of thesis paper?"
qq = embed(q,pool=True)[1]
qq = qq.detach().numpy()
docs = index.search(qq,k=5)

In [142]:
nearest_neighbor_vectors = [knowledge[i] for i in docs[1]]

In [143]:
index.get(nearest_neighbor_vectors[0])

['How to Sign Up for Thesis?',
 'Who Can Be Thesis Supervisors?',
 'Students register to the International Relations Program to get supervisory and to SSC to get enrollment status.',
 'The output of thesis course in BINUS is to publish the thesis paper at reputable international scholarly publication.',
 'The 6-credit Thesis course is the thesis course that students take in one semester.']

In [146]:
docs[0]

array([[12.801563, 18.83541 , 22.850504, 22.960098, 23.374928]],
      dtype=float32)