## Load data and preprocess

In [9]:
import pandas as pd

data = pd.read_csv('../data/reddit.csv')

In [10]:
subs = data.loc[data['aware_post_type']=='submission'].reset_index(drop=True)

subs.loc[:, 'text_title'] = [str(subs['reddit_title'].iloc[i]) + ' [SEP] ' + str(subs['reddit_text'].iloc[i]) for i in range(len(subs))]
subs.loc[:, 'text_title_subreddit'] = [str(subs['reddit_title'].iloc[i]) + ' ' + str(subs['reddit_text'].iloc[i]) + ' #' + str(subs['reddit_subreddit'].iloc[i]) for i in range(len(subs))]

subs['comment_indices'] = [[] for _ in range(len(subs))]
for i in range(len(subs)):
    sub_index = subs.loc[i, 'index']
    next_sub_index = len(data) if i == len(subs)-1 else subs.loc[i+1,'index']
    for j in range(sub_index+1, next_sub_index):
        if data.loc[j, 'reddit_parent_id'] == subs.loc[i, 'reddit_name']:
            subs['comment_indices'].iloc[i].append(data.loc[j,'index'])

subs = subs.drop(subs[subs['comment_indices'].str.len()==0].index)

## Train test split

In [16]:
from sklearn.model_selection import train_test_split

subs_train, subs_test = train_test_split(subs, test_size=100)

subs_train = subs_train.reset_index(drop=True)
subs_test = subs_test.reset_index(drop=True)

## Generate test questions and ground truth

In [18]:
question = subs_test['text_title']

In [19]:
ground_truth = [data.loc[subs_test.iloc[i]['comment_indices'][0], 'reddit_text'] for i in range(len(subs_test))]

## Generate contexts

In [20]:
import torch

if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

In [21]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2", device)
model.save(path='../emb/all-MiniLM-L6-v2', model_name='all-MiniLM-L6-v2')

In [22]:
vectors = model.encode(subs_train['text_title_subreddit'].values.tolist(),
                       convert_to_numpy=True,
                       normalize_embeddings=True)
subs_train['vector'] = vectors.tolist()

In [23]:
import lancedb

db = lancedb.connect("../.lancedb")
table = db.create_table("reddit_submissions", subs_train, mode="overwrite")
# table = db.create_table("reddit_submissions", subs, exist_ok=True)

In [24]:
context_num = 4
contexts = [['' for j in range(context_num)] for i in range(len(subs_test))]

for i in range(len(subs_test)):
    query = model.encode(question[i],
                       convert_to_numpy=True,
                       normalize_embeddings=True).tolist()
    response = table.search(query).limit(context_num).to_pandas()
    comment_indices = [response.loc[j, 'comment_indices'] for j in range(context_num)]
    for j in range(context_num):
    # contexts.append([data.loc[comment_indices[j], 'reddit_text'] for j in range(context_num)])
        contexts[i][j] = data.loc[comment_indices[j][0], 'reddit_text']

## Generate answers using llama3

In [26]:
from llama_cpp import Llama

n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

# Make sure the model path is correct for your system!
generating_llm = Llama(
    model_path="../llm/llama-3-8b.Q5_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=850,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=True,
)

llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from ../llm/llama-3-8b.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = .
llama_model_loader: - kv   2:                           llama.vocab_size u32              = 128256
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   7:                 llama.rope.dimension_count u32         

In [None]:
answer = []

for i in range(len(subs_test)):
      context = contexts[i][0][:2500]
      prompt = "Summarize the following, " + str(context) + "Then answer question: " + str(question[i][:2500]) + "Answer: "
      # prompt = 'Q:' + str(question[i]) + "A: "
      output = generating_llm(prompt,
            max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
            stop=["\n"], # Stop generating just before the model would generate a new question
            echo=False # Echo the prompt back in the output
      )
      answer.append(output['choices'][0]['text'])

## Evaluation

In [1]:
from langchain_community.llms import LlamaCpp
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

# Make sure the model path is correct for your system!
evaluating_llm = LlamaCpp(
    model_path="../llm/llama-3-8b.Q5_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=850,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=True,
)

evaluating_llm = LangchainLLMWrapper(evaluating_llm)

llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from ../llm/llama-3-8b.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = .
llama_model_loader: - kv   2:                           llama.vocab_size u32              = 128256
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   7:                 llama.rope.dimension_count u32         

In [61]:
from langchain.embeddings import HuggingFaceEmbeddings

evaluating_embeddings = HuggingFaceEmbeddings(
    model_name="../emb/all-MiniLM-L6-v2",     # Provide the pre-trained model's path
    model_kwargs={'device': device}, # Pass the model configuration options
    encode_kwargs={'normalize_embeddings': True} # Pass the encoding options
)

evaluating_embeddings = LangchainEmbeddingsWrapper(evaluating_embeddings)

In [62]:
from ragas import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)

In [64]:
from datasets import Dataset

data_samples = {'question': question, 'contexts': contexts, 'ground_truth': ground_truth}
dataset = Dataset.from_dict(data_samples)

In [None]:
result = evaluate(
    dataset,
    llm = evaluating_llm,
    embeddings=evaluating_embeddings,
    metrics=[
        answer_relevancy,
        faithfulness,
        context_recall,
        context_precision,
    ])