In [2]:
%pip install faiss-cpu

Note: you may need to restart the kernel to use updated packages.


In [3]:
from transformers import AutoTokenizer, AutoModel, GPT2LMHeadModel
import faiss
import torch
import os
import numpy as np
from collections import OrderedDict

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def read_text_files(directory_path: str) -> list[str]:
	"""
	Reads all .txt files in the specified directory, stores each file's content as a single string,
	and appends it to a main list.

	:return: a list where each element is the content of a single text file.
	"""
	file_contents = []

	for filename in os.listdir(directory_path):
		if filename.endswith('.txt'):
			file_path = os.path.join(directory_path, filename)
			with open(file_path, 'r', encoding='utf-8') as file:
				content = file.read()
				file_contents.append(content)

	return file_contents


In [5]:
text_chunks = read_text_files('data')

In [None]:
embedder = AutoModel.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
generator = GPT2LMHeadModel.from_pretrained("gpt2")

def create_embeddings(text_chunks_input: list[str]) -> list[torch.FloatTensor]:
	"""
	Creates embedding vectors for a set of text chunks.
	:param text_chunks_input: list of strings, indicating the contents of each file 
	:return: An embedding space tensor of shape (1, emb_dim)
	"""
	chunk_embeddings = []

	for i, chunk in enumerate(text_chunks_input):

		inputs = tokenizer(chunk, return_tensors="pt", truncation=True)

		with torch.no_grad():
			embedding = embedder(**inputs).last_hidden_state.mean(dim=1)
		chunk_embeddings.append(embedding)

	return chunk_embeddings

embeddings = create_embeddings(text_chunks)

In [7]:
# set up FAISS
dimension = embeddings[0].shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.vstack(embeddings))

In [19]:
def gen_answer(query: str) -> None:
	"""
	The query workflow, generating an answer for a query, which is then is printed.
	:param query: Question as a string
	"""
	k_n = 3
	repeat = True
	while repeat:
		try:

			query_embedding = embedder(**tokenizer(query, return_tensors="pt")).last_hidden_state.mean(dim=1).detach().numpy()
			distances, indices = index.search(query_embedding, k=k_n)
			tokenizer.add_special_tokens({"pad_token": "--"})
			
			context = " ".join([text_chunks[i] for i in indices[0]])
			input_text = f"Context: {context}\n\nQuery: {query}\nAnswer:"
			inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
			attention_mask = inputs['attention_mask']

			input_length = len(inputs['input_ids'][0])  
			buffer_length = 200

			max_length = input_length + buffer_length
			
			output = generator.generate(inputs['input_ids'], attention_mask=attention_mask, max_length=max_length)
			
			answer = tokenizer.decode(output[0], skip_special_tokens=True)
			answer = answer.split(sep='Answer: ')[1]

			print(answer)
			repeat = False
			
		except IndexError:
			k_n = k_n - 1
			print("Context too big, shrinking...")



In [None]:
gen_answer("How much money can i get from a canceled flight?")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


The airline will give you a written notice of the cancellation of your flight.

If you are unable to get the airline to give you a written notice, you can request a refund of the amount you paid for the item.

If you are unable to get the airline to give you a written notice, you can request a refund of the amount you paid for the item.

If you are unable to get the airline to give you a written notice, you can request a refund of the amount you paid for the item.

If you are unable to get the airline to give you a written notice, you can request a refund of the amount you paid for the item.

If you are unable to get the airline to give you a written notice, you can request a refund of the amount you paid for the item.

If you are unable to get the airline to give you a written notice, you can request a refund of the amount you paid for the item


In [21]:
gen_answer("How long must a flight be delayed before i get compensation?")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Context too big, shrinking...
The airline must provide you with a valid flight reservation and travel documentation. The airline must also provide you with a valid flight documentation.

If you have not received compensation for your flight, you can request compensation from the airline.

If you have not received compensation for your flight, you can request compensation from the airline.

If you have not received compensation for your flight, you can request compensation from the airline.

If you have not received compensation for your flight, you can request compensation from the airline.

If you have not received compensation for your flight, you can request compensation from the airline.

If you have not received compensation for your flight, you can request compensation from the airline.

If you have not received compensation for your flight, you can request compensation from the airline.

If you have not received compensation for your flight, you can request compensation from the

# Output Examples

`gen_answer("How much money can i get from a canceled flight?")`

You can get from a ticket of EUR 5 000 to a ticket of 1 000 EUR, depending on your ticket type. You may get more if you buy tickets for the same ticket type.

Where can i get assistance?

If there are any airlines which offer special services to passengers who are in need of assistance, they can provide you with the information below.

What is the difference between a ticket of €10,000 and a ticket of a ticket of less than €10, 000?

A ticket of € 10,000 is usually reserved for the first two days of your flight. A ticket of is usually reserved to the first two day of your flight.

A € 10, 000 ticket is usually reserved by the airline for the first three days of your trip.

`gen_answer("How long must a flight be delayed before i get compensation?")`

The delay between the booking of your flight and the arrival of your flight is not a penalty. In the case of a delay of less than one hour, the airline will reimburse you for the delay. In the event of a delay less than one minute, the airline may refund the money you paid for the delay and rebook the flight. In the absence of an emergency, the airline can provide you with the necessary information to check your flight.


You may also request compensation from the airline for the delay or re-routes if you are not able to make a claim on the airline's behalf. If you are unable to make a flight claim, you will have to pay the airline.


If you are unable or unwilling to make a request for compensation, you can ask the airline for compensation from the ticketmaster. You will have to provide the airline with a statement showing the amount of compensation you have received.