In [None]:
## Importing libraries
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings, OpenAIEmbeddings
import os
from langchain_community.vectorstores import FAISS, Qdrant, Chroma
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
from datasets import load_dataset
import pandas as pd
import evaluate
from evaluate import load
import tensorflow_hub as hub
from scipy.spatial import distance
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
import pandas as pd

## Some evaluation functions
def embed(input, model):
    return model(input)

def SAS(preds, refs, model):
    similarities = []
    embeddings_preds = model.encode(preds)
    embeddings_refs = model.encode(refs)
    for i in range(len(embeddings_preds)):
        similarity = util.pytorch_cos_sim(embeddings_preds[i], embeddings_refs[i])
        similarities.append(similarity[0][0].item())
    average_similarity_score = sum(similarities) / len(similarities)
    return average_similarity_score

def evaluate_predictions(references, predictions, text_splitter_name,embedding_name,db_name):

    bertscore = load("bertscore")
    bleu = evaluate.load('bleu')
    rouge = evaluate.load('rouge')

    references = references["Answer"]
    predictions = predictions["train"]["Text"]

    bert_score = bertscore.compute(predictions=predictions, references=references, lang="nb")
    bleu_score = bleu.compute(predictions=predictions, references=references, max_order=2)
    rouge_score = rouge.compute(predictions=predictions, references=references)

    avg_precision = sum(bert_score['precision']) / len(bert_score['precision'])
    avg_recall = sum(bert_score['recall']) / len(bert_score['recall'])
    avg_f1 = sum(bert_score['f1']) / len(bert_score['f1'])

    ## SAS encoder score
    module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
    encoder_model = hub.load(module_url)
    
    list_of_similarity_scores = []
    for i in range(len(predictions)):
        similarity_score = 1-distance.cosine(embed([predictions[i]], encoder_model)[0, :],embed([references[i]], encoder_model)[0, :])
        list_of_similarity_scores.append(similarity_score)
    average_score = sum(list_of_similarity_scores) / len(list_of_similarity_scores)

    ## SAS transformer score
    transformer_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')


    data = {
        "Name_of_model": [text_splitter_name, embedding_name, db_name],
        "Metric": ["BLEU Score", "ROUGE Score", "Average Precision", "Average Recall", "Average F1 Score", "Average SAS encoder Score", "Average SAS transformer Score"],
        "Score": [bleu_score, rouge_score, avg_precision, avg_recall, avg_f1, average_score, SAS(predictions, references)]
    }
    return data


counter = 0
all_data = []
##Input tokenizer and output model
tokenizer = AutoTokenizer.from_pretrained("RuterNorway/Llama-2-13b-chat-norwegian")
model = AutoModelForCausalLM.from_pretrained("RuterNorway/Llama-2-13b-chat-norwegian")

##Loading dataset
dataset = load_dataset('csv', data_files=r'/Users/adrianfolge/Documents/lokal:skole/Master/data/synthetic_data/question_with_answers.csv', split="train[:10]")

## Open AI key
os.environ["OPENAI_API_KEY"] = "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"

## Defining document loader
loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader)
documents = loader.load()

## Split the documents to chunks of text
Character_Text_Splitter_100_20 = CharacterTextSplitter(chunk_size=100, chunk_overlap=20)
Recursive_Text_Splitter_100_20 = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
Character_Text_Splitter_500_50 = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
Recursive_Text_Splitter_500_50 = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
Character_Text_Splitter_1000_100 = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
Recursive_Text_Splitter_1000_100 = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
splitters = [Character_Text_Splitter_100_20,Recursive_Text_Splitter_100_20, Character_Text_Splitter_500_50, Recursive_Text_Splitter_500_50, Character_Text_Splitter_1000_100, Recursive_Text_Splitter_1000_100]
for split in splitters:
    docs = split.split_documents(documents)

    ## Defining embeddings
    paraphrase = SentenceTransformerEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2') 
    e5 = SentenceTransformerEmbeddings(model_name='intfloat/multilingual-e5-large')
    OpenAIEmbeddings = OpenAIEmbeddings()
    embeddings = [paraphrase, OpenAIEmbeddings, e5]
    for embedding in embeddings:
        ## Defining vectorstores
        qdrant = Qdrant.from_documents(docs, embedding, location=":memory:", collection_name="my_documents")
        FAISS = FAISS.from_documents(docs, embedding)
        Chroma = Chroma.from_documents(docs, embedding)
        db = [FAISS, Chroma, qdrant]
        for db in db:
            print(f"Starter med {counter}/54")
            answers_from_model = []
            for i in range(10):
                query = dataset["Question"][i]
                found_docs = db.similarity_search(query)
                context = found_docs[0].page_content
                input = f"Spørsmål: {query} context: {context}"
                instruction = "Svar på spørsmålet basert på det som står i 'context'"
                prompt_template=f'''### Instruction: {instruction}
                ### Input: {input}
                ### Response:
                '''
                inputs = tokenizer(prompt_template, return_tensors="pt")

                out = model.generate(**inputs, max_new_tokens=200)
                print(tokenizer.decode(out[0], skip_special_tokens=True))

                # Pipeline prompting
                pipe = pipeline(
                    "text-generation",
                    model=model,
                    do_sample=True,
                    tokenizer=tokenizer,
                    max_new_tokens=512,
                    temperature=0.7,
                    top_p=0.95,
                    repetition_penalty=1.15
                )
                answers_from_model.append(pipe(prompt_template)[0]['generated_text'][len(prompt_template):])
            preds = answers_from_model
            text_splitter_name = str(split._chunk_size)+split.__class__.__name__
            if hasattr(embedding, 'model_name'):
                embedding_name = embedding.model_name
            else:
                embedding_name = embedding.model
            eval = evaluate_predictions(dataset, preds, text_splitter_name,embedding_name,db)
            all_data.append(eval)
            counter+=1