In [1]:
from transformers import BertTokenizer, TFBertModel,BertForQuestionAnswering,BertForMaskedLM,BertModel
from transformers import T5ForConditionalGeneration, T5Tokenizer
from transformers import pipeline
import logging
import tensorflow as tf
import torch
from nltk.tokenize import sent_tokenize
import faiss
import numpy as np

logging.getLogger("transformers.modeling_utils").setLevel(logging.WARNING)

In [2]:
tokenizer = BertTokenizer.from_pretrained('./pretrained')
QA = BertForQuestionAnswering.from_pretrained("./QA_model")
model = BertModel.from_pretrained("./model")

In [83]:
def embed(text,pool=False):
    tokens = tokenizer(text,return_tensors="pt", truncation=True, padding=True, max_length=2014)
    with torch.no_grad():
        outputs = model(**tokens)
        
    if pool:
        return tokens,outputs["pooler_output"].numpy()
    
    return tokens,outputs["last_hidden_state"].numpy()

In [81]:
context= """The output of thesis course in BINUS is to publish the thesis paper at reputable international scholarly publication. To achieve this goal, thesis paper in the International Relations Program is structured as scholarly literature, like academic journal article or conference paper.

There are two thesis schemes that students can choose: (1) regular/non-class scheme (nonkelas) and (2) scholarly article scheme (artikel ilmiah).

There are two thesis course schemes that students can choose: (1) 6-credit Thesis course and (2) 2+4-credit Pre-Thesis and Thesis courses.

Output of Thesis Course: Scholarly Publication
Students are expected to submit their thesis paper at reputable international scholarly publication, either by co-authoring with their supervisors or as single author.

Examples of students’ thesis-based scholarly publication: download here

Format of Thesis Paper: Scholarly Literature
Thesis paper is structured as scholarly literature, like academic journal article or conference paper, not in chapters. The sections are:

Abstract
Introduction
Literature Review
Research Method
Findings, Analysis, and Discussion
Conclusion
Bibliography/References
Thesis word count is as a journal article or conference paper; approximately 5,000-8,000 words.

Thesis Schemes
There are two thesis schemes that students can choose: (1) regular/non-class scheme (nonkelas) and (2) scholarly article scheme (artikel ilmiah).

In the regular/non-class scheme (nonkelas), students finish writing their thesis, submit the thesis soft cover, undergo thesis defense, and finally submit the thesis paper to scholarly publication (academic journal or conference) after the process is complete. The thesis is graded based on the thesis content, supervisory process, and thesis defense process.

In the scholarly article scheme (artikel ilmiah), students finish writing their thesis and submit the thesis paper to scholarly publication (academic journal or conference) before submitting the thesis soft cover and undergo thesis defense. The thesis defense is in the form of poster presentation. The thesis is graded based on the status of submission at scholarly publication (submitted, under review, revised, or accepted).

Thesis Course Schemes
There are two thesis course schemes that students can choose: (1) 6-credit Thesis course and (2) 2+4-credit Pre-Thesis and Thesis courses.

The 6-credit Thesis course is the thesis course that students take in one semester.

The 2+4-credit Pre-Thesis and Thesis courses are two courses that students take in two different semesters but are considered as one thesis course. Students first take the 2-credit Pre-Thesis course with thesis proposal as the output. In the next semester, students take the 4-credit Thesis course. With this scheme, students can take thesis courses at the same semester as 3+1 enrichment programs or the Kampus Merdeka program, and outstanding students can aim to graduate in 7 semesters. The requirement to choose this scheme is a GPA greater than or equal to 3.25.

How to Sign Up for Thesis?
Students must sign up for thesis to the International Relations Program at the same time as registering to SSC. Students register to the International Relations Program to get supervisory and to SSC to get enrollment status.

For the 2023/2024 even semester, students sign up for thesis to the International Relations Program by filling out the Thesis Registration Form [https://bit.ly/regskripsiHIBNgenap2324].

After filling out the form, students contact the potential supervisors for their thesis to discuss the choice of thesis scheme, topics, and title.

If the lecturers agree to be their thesis supervisor, students fill out the Thesis Supervisory Form [https://bit.ly/bimbinganskripsiHIBNgenap2324].

Based on the form responses, the International Relations Program will issue a Thesis Supervisor Appointment Letter (Surat Penunjukan Pembimbing Skripsi/SPP).

To complete a major in International Business with either a single or dual degrees (DD), students must complete a minimum of 146 SCUs. Available streaming courses are available in this program, namely: Business in China (DD with Ningbo University), Business in ASEAN (Single Degree), European Management and International Trade (DD with Cologne Business School), Business and Management (DD with Bournemouth University), Commerce, (DD with UNSW or VUW).
Overseas study is mandatory for international business students in order to complete the program.  Students may choose any of the following options:
The teaching and learning processes are conducted through lectures/tutorials, student centered learning, practical demonstration/presentation and activities, completed with students’ independent study. It is the responsibility of the lecturer of a particular course to facilitate all students’ learning on the course, who can be assisted by a mentor, if necessary. By having qualified lecturers and guest lecturers from professional industries, the students will be able to gain knowledge from both sides, i.e. theoretical and practical frameworks, through in-depth analysis of case studies, and individual/group work projects. For doing their research activities, students can access magazines, books, academic journal in a good quality library – including accessing the online library to get updated academic papers. All course-work are assessed through a variety of assessment tasks such as reports, presentations, assignments, individual and group projects, and thesis/final project report as well as mid-semester and final semester examinations. To further enrich our Bachelor  studies experience we are providing the Enrichment Track in semester 6 or 7, students are able to choose from these various options:-
Study Abroad
Internship (Local/International)
Entrepreneurship
Community Development
Research
A series of extra-curricular activities are compulsory in the International Business Program. These activities will allow students to develop their social awareness, competitive and soft skills needed to be prepared for their future work environment.

In addition, the innovation habit will be developed through course assessment that put weight on innovation and entrepreneurial skills. Project Hatchery and Design Driven Entrepreneurship are courses that reflect such competence designed by the Center of Innovation and Entrepreneurship in Binus University International. Students are required to translate their selected innovative ideas into a visible design to comprehend the end-to-end innovation process. This innovation thinking approach is implemented in the teaching, learning, and assessment process of several courses throughout the program.
"""

In [143]:
contexts = sent_tokenize(context)
knowledge = []
for i in contexts:
    knowledge.append(embed(i,pool=True)[1])

In [144]:
knowledge = np.vstack(knowledge)

In [139]:
class XFaiss(faiss.IndexIVFPQ):
    
    def __init__(self,**kwargs):
        super(XFaiss,self).__init__(**kwargs)
        self.dict = {}
        
    def feed(self,data:[[str,torch.tensor]]):
        for i in data:
            self.dict[tuple(i[1])]=i[0]
    
    def get(self,tensors):
        l = []
        for i in tensors:
            l.append(self.dict[tuple(i)])
        return l

In [109]:
knowledge.shape

(42, 1024)

In [146]:
quantizer = faiss.IndexFlatL2(1024) 
index.add(knowledge)

In [147]:
q = "what is the output of thesis course?"
qq = embed(q,pool=True)[1]
docs = index.search(qq,k=5)

In [148]:
docs

(array([[21.597832, 21.597832, 21.597832, 21.597832, 21.597832]],
       dtype=float32),
 array([[ 29,  71, 113, 155, 197]], dtype=int64))

In [114]:
contexts[41]

'This innovation thinking approach is implemented in the teaching, learning, and assessment process of several courses throughout the program.'

In [95]:
nearest_neighbor_vectors = [contexts[i] for i in docs[1]]

TypeError: only integer scalar arrays can be converted to a scalar index

In [76]:
nearest_neighbor_vectors

[array([[ 0.9359872 , -0.9998128 , -0.44052768, ...,  0.9979662 ,
         -0.99989575, -0.11708409],
        [ 0.8707633 , -0.9998219 , -0.5060261 , ...,  0.99803436,
         -0.9998381 , -0.15273362],
        [ 0.8707633 , -0.9998219 , -0.5060261 , ...,  0.99803436,
         -0.9998381 , -0.15273362],
        [ 0.8707633 , -0.9998219 , -0.5060261 , ...,  0.99803436,
         -0.9998381 , -0.15273362],
        [ 0.8707633 , -0.9998219 , -0.5060261 , ...,  0.99803436,
         -0.9998381 , -0.15273362]], dtype=float32)]

In [75]:
index.get(nearest_neighbor_vectors[0])

AttributeError: 'IndexIVFPQ' object has no attribute 'get'

In [146]:
docs[0]

array([[12.801563, 18.83541 , 22.850504, 22.960098, 23.374928]],
      dtype=float32)