In [3]:
# libaries 

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama

from transformers import AutoTokenizer
from IPython.display import display, HTML
import json
import time
import pathlib
import os

In [4]:
# Reading txt files

# define what documents to load and where from
class UTF8TextLoader(TextLoader):
    def __init__(self, file_path: str, encoding: str = 'utf-8'):
        super().__init__(file_path, encoding)
        
loader = DirectoryLoader(r"Z:\Uni-CE901\22-24_CE901-CE911-CF981-SU_stoyles_thomas\Data", glob="*.txt", loader_cls=UTF8TextLoader) # Change this to your directory

# interpret information in the documents
documents = loader.load()
splitter = CharacterTextSplitter()
texts = splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'})

# create and save the local database
db = FAISS.from_documents(texts, embeddings)
db.save_local("faiss")



In [5]:
# Initialize the tokenizer 
tokenizer = AutoTokenizer.from_pretrained("gpt2") # Change if you wish to test other tokenizers

# Counting tokens in files and directory
def count_tokens_in_file(file_path, tokenizer):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    tokens = tokenizer.encode(text)
    return len(tokens)

def check_tokens_in_directory(directory_path, tokenizer, max_tokens):
    for file_name in os.listdir(directory_path):
        if file_name.endswith('.txt'):
            file_path = os.path.join(directory_path, file_name)
            num_tokens = count_tokens_in_file(file_path, tokenizer)
            print(f"File: {file_name}, Tokens: {num_tokens}")
            if num_tokens > max_tokens:
                print(f"WARNING: {file_name} exceeds the maximum token limit of {max_tokens}")

def set_and_check_token_limit(directory_path, tokenizer, max_tokens):
    print(f"\nSetting token limit to: {max_tokens}\n")
    check_tokens_in_directory(directory_path, tokenizer, max_tokens)


In [6]:
# Set the path to where you have the text files
directory_path = r"Z:\Uni-CE901\22-24_CE901-CE911-CF981-SU_stoyles_thomas\Data"  # Change this to your directory

In [7]:
# prepare the template we will use when prompting the response
template = """Context: {context}
Question: {question}
"""
# load the language model (Editing model to find best and tokens)
config = {'max_new_tokens': 512, 'temperature': 0.01}
llm = Ollama(model="llama3") # Use Ollama website for more models https://ollama.com/library

# Enable dangerous deserialization if available
if hasattr(llm, 'allow_dangerous_deserialization'):
    llm.allow_dangerous_deserialization = True

# load the interpreted information from the local database
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'})
db = FAISS.load_local("faiss", embeddings, allow_dangerous_deserialization=True)

# prepare a version of the llm pre-loaded with the local content
retriever = db.as_retriever(search_kwargs={'k': 5}) # Max number of documents to read (k)
prompt = PromptTemplate(
    template=template,
    input_variables=['context', 'question'])

QA_LLM = RetrievalQA.from_chain_type(llm=llm,
                                     chain_type='stuff',
                                     retriever=retriever,
                                     return_source_documents=True,
                                     chain_type_kwargs={'prompt': prompt})



In [8]:
# Define a function to truncate documents
def truncate_document(doc, max_tokens, tokenizer):
    tokens = tokenizer.encode(doc.page_content)
    truncated_tokens = tokens[:max_tokens]
    truncated_text = tokenizer.decode(truncated_tokens)
    return truncated_text

# Define the query function with manual token limit handling
def query_with_token_limit(model, question, max_tokens, tokenizer):
    model_path = model.combine_documents_chain.llm_chain.llm.model
    model_name = pathlib.Path(model_path).name
    time_start = time.time()
    output = model({'query': question})
    response = output["result"]
    time_elapsed = time.time() - time_start

    # Extract and display the source documents
    source_documents = output.get('source_documents', [])
    
    # Display information
    display(HTML(f'<code>{model_name} response time: {time_elapsed:.02f} sec</code>'))
    display(HTML(f'<strong>Question:</strong> {question}'))
    display(HTML(f'<strong>Answer:</strong> {response}'))
    
    # Display all source documents, truncated to the max token limit
    for i, doc in enumerate(source_documents):
        truncated_text = truncate_document(doc, max_tokens, tokenizer)
        display(HTML(f'<strong>Source Document {i+1} (truncated to {max_tokens} tokens):</strong> <pre>{truncated_text}</pre>'))


# Define the query
query_text = "What symptoms does Stress have?"

# Example usage of the updated query function with different token limits
token_limits = [1, 124, 256, 512, 1024, 2048, 4024]

for limit in token_limits:
    print(f"\nRunning model with token limit: {limit}\n")
    query_with_token_limit(QA_LLM, query_text, limit, tokenizer)



Running model with token limit: 1



  warn_deprecated(



Running model with token limit: 124




Running model with token limit: 256




Running model with token limit: 512




Running model with token limit: 1024




Running model with token limit: 2048



KeyboardInterrupt: 