In [12]:
import warnings
warnings.filterwarnings('ignore')

In [13]:
import torch
from tqdm.auto import tqdm

device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'

In [14]:
!nvidia-smi
print(device)

Mon Feb 26 14:42:51 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A4000               On  | 00000000:00:05.0  On |                  Off |
| 41%   41C    P8              12W / 140W |   3864MiB / 16376MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A4000               On  | 00000000:00:07.0 Off |  

In [15]:
from huggingface_hub import login, HfApi
hf_token = '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'
login(token=hf_token)
api = HfApi(token=hf_token)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/paperspace/.cache/huggingface/token
Login successful


In [16]:
from langchain.embeddings import HuggingFaceEmbeddings
from datasets import load_dataset, Dataset

path = '/home/paperspace/certibot/data/certibot-data.jsonl'

raw_dataset = load_dataset('json', data_files=path, split='train')
raw_dataset

Using custom data configuration default-dd3eae9dd2681203
Reusing dataset json (/home/paperspace/.cache/huggingface/datasets/json/default-dd3eae9dd2681203/0.0.0/a3e658c4731e59120d44081ac10bf85dc7e1388126b92338344ce9661907f253)


Dataset({
    features: ['topic', 'article'],
    num_rows: 97
})

In [17]:
data = raw_dataset.to_pandas()

data = data.reset_index()
data['context'] = data['topic'] + ': ' + data['article']
data['index'] = data['index'].astype((str))

dataset = Dataset.from_pandas(data)

In [18]:
# checkpoint = "Cohere/Cohere-embed-multilingual-v3.0"
checkpoint = "intfloat/multilingual-e5-large-instruct"
embed_dim = 1024
batch_size = 4

embedding = HuggingFaceEmbeddings(model_name=checkpoint)

You try to use a model that was created with version 2.4.0.dev0, however, your version is 2.3.1. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





In [19]:
import pinecone

api_key = 'a344a187-8b52-422d-97c2-96628ef67ef6'

pc = pinecone.Pinecone(api_key=api_key)
index_name = 'certibot-rag'

add = True

In [20]:
import time

# check if index already exists (it shouldn't if this is first time)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=embed_dim,
        metric='cosine',
        spec=pinecone.PodSpec('gcp-starter')
    )
    # wait for index to be initialized
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# connect to index
index = pc.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 1024,
 'index_fullness': 0.00097,
 'namespaces': {'': {'vector_count': 97}},
 'total_vector_count': 97}

In [21]:
if add:
  progress_bar = tqdm(range(0, len(data), batch_size))

  for i in range(0, len(data), batch_size):
    end = min(len(data), i + batch_size)
    batch = data[i:end]

    ids = batch['index']
    texts = batch['context']

    embeds = embedding.embed_documents(texts)

    metadata = [{
        'context': x['context'],
        'topic': x['topic']
        } for _, x in batch.iterrows()]

    vectors = list(zip(ids, embeds, metadata))
    index.upsert(vectors=vectors)
    progress_bar.update(1)
else:
  print('No data to upsert')

  0%|          | 0/25 [00:00<?, ?it/s]

In [22]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
mistral_checkpoint = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(mistral_checkpoint, padding_side='left')

model = AutoModelForCausalLM.from_pretrained(
    mistral_checkpoint,
    torch_dtype=torch.float16,
    device_map='auto',
    )

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [23]:
system_instruction = """Ce qui suit est une conversation entre une IA et un humain. La conversation est en français.
Tu es un assistant pour les clients de CertiDeal. CertiDeal vends des smartphones reconditionnés.
Ci-dessous est une requête d'un utilisateur ainsi que quelques contextes qui peuvent être pertinents.
Réponds à la question en fonction des informations contenues dans ces contextes.
Si tu ne trouve pas la réponse à la question, dis "Je ne sais pas".
Si les contextes ne sont pas en relation avec la question et n'apporte aucun élément de réponse dis "Je ne sais pas".

N'oublie pas l'historique de la conversation. Toute les réponses doivent être en Français
"""

In [24]:
def RAG_retrieve(query:str, top_k:int=5) -> tuple:
  """
  Embeds a user query, retrieves top_k relevant contexts and returns them for
  use by the LLM.
  """

  query_embed = embedding.embed_query(query)
  res = index.query(vector=query_embed, top_k=top_k, include_metadata=True, include_values=True)
  contexts = [x['metadata']['context'] for x in res['matches']]
  scores = [x['score'] for x in res['matches']]

  return contexts, scores


def RAG_generate(query:str, contexts:list, conversation_history:list[dict]) -> str:
    
    # format retrieved contexts
    context = ""
    for i, text in enumerate(contexts):
        context += f'*{i}: {text}\n\n'
    
    history_prompt = ""
    if conversation_history != []:
        for exchange in conversation_history:
            history_prompt += f">Q: {exchange['query']}\n>A: {exchange['response']}\n"

    # prompt template based on the model

    prompt = f"""<s>[INST] Instruction du système
    {system_instruction}
    ---------------------------------------------------------------------
    Historique de la conversation:
    {history_prompt}
    ---------------------------------------------------------------------
    Contextes pour inspiration:
    {context}
    ---------------------------------------------------------------------
    
    Question: {query}
    Compte tenu des informations contextuelles et non des connaissances préalables, réponds à la requête.
    Crée une réponse informative sans recopier la requête. Montre que tu as compris la demande en apportant une réponse précise et pertinente. Réponds exclusivement en Français.
    Si la requête est amicale et ne demande pas de question, n'utilise pas le contexte!
    [/INST]
    Réponse:</s>"""
    eos = '[/INST]'
        
    # tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    # generate answer
    outputs = model.generate(
        input_ids,
        attention_mask=attention_mask,
        temperature=0.5,
        max_new_tokens=1024,
        )
    
    # decode the output
    output = tokenizer.decode(outputs[0].to(device), skip_special_tokens=True)

    # keep the generated part
    idx = output.index(eos) + len(eos)
    answer = output[idx:].strip()
    answer = answer.replace('Réponse:', '').strip()

    return answer

In [25]:
conversation_history = []

query = "que pense tu de backmarket?"
contexts, scores = RAG_retrieve(query, top_k=4)
out = RAG_generate(query=query, contexts=contexts, conversation_history=conversation_history)
conversation_history.append({'query':query, 'response':out})
print(out)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


I'm an assistant for CertiDeal's clients. Based on the context provided, I cannot find any information about "Backmarket" in the given contexts. Therefore, I cannot provide a precise answer to your question.


# Eval

In [102]:
questions = [
    'qui est certideal?',
    'quelles sont les modalités de livraison?',
    'quel est le delai de livraison?',
    "comment beneficier de l'offre free mobile?",
    "quelles garanties proposez-vous sur les iphones?",
    "combien coute l'iphone 12 pro?",
    "est ce que je peux payer sur plusieurs fois?",
    "d'ou vienne vos smartphones?",
    "que pense tu de backmarket?",
    "vos smartphones sont-ils meilleurs que backmarket",
    "quels sont les états de smartphones?",
    "j'ai un problème avec ma commande",
    "iphone 12 mini ou iphone 11 pro max?"
]

eval_dict = {'query':[], 'answer':[], 'context':[]}

In [123]:
progress_bar = tqdm(range(len(questions)))

conversation_history = []
for question in questions:
    context = RAG_retrieve(question, top_k=3)
    out = RAG_generate(question, context, conversation_history)
    conversation_history.append({'query':query, 'response':out})

    eval_dict['query'].append(question)
    eval_dict['context'].append(context)
    eval_dict['answer'].append(out)

    progress_bar.update(1)

  0%|          | 0/13 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token

In [140]:
import pandas as pd
eval_df = pd.DataFrame(eval_dict)

i = 12
print(questions[i])
print('----------------------------------------------------------------------------')
print('************ MISTRAL *************')
print(eval_df.loc[i, 'answer'])
print('----------------------------------------------------------------------------')
eval_df.loc[i, 'context']


iphone 12 mini ou iphone 11 pro max?
----------------------------------------------------------------------------
************ MISTRAL *************
both the iPhone 12 mini and the iPhone 11 Pro Max are great options, but they have some differences. The iPhone 11 Pro Max was released in 2019 and has a larger screen and battery compared to the iPhone 12 mini. The iPhone 12 mini, on the other hand, is more compact and has a smaller battery but still offers good battery life. Both models have high-quality OLED screens and powerful processors. If you're looking for a smaller device without sacrificing features, the iPhone 12 mini might be the better choice. However, if a larger screen and longer battery life are priorities, the iPhone 11 Pro Max would be a good fit.

Regarding your question, you've asked for a comparison between the iPhone 12 mini and the iPhone 11 Pro Max. Based on the context provided, I've outlined the main differences between these two models. I hope this information h

(["iPhone 14 Pro Max: Grâce à sa taille, à la vitesse de son processeur, à l'autonomie de sa batterie et à sa triple caméra arrière, l'iPhone 14 Pro Max, lancé en Septembre 2022, est le téléphone idéal pour tous ceux qui recherchent un smartphone à usage professionnel. En général, les plus grandes différences par rapport à la version Pro se situent au niveau de la taille de l'écran et de l'autonomie de la batterie",
  "iPhone 13 Mini: L'iPhone 13 Mini, lancé en Septembre 2021, offre des performances exceptionnelles et un écran OLED de haute qualité dans un format compact. L'appareil photo a été considérablement amélioré et la batterie a une plus grande autonomie. Si vous recherchez un petit iPhone sans sacrifier les fonctionnalités, le 13 mini est le bon choix.\nDe plus, si vous achetez un smartphone reconditionné chez CertiDeal, vous pourrez profiter de toutes ses fonctionnalités à moindre coût. Nos appareils sont contrôlés par des experts et garantis pour une durée équivalente à cell

# Guardrails

In [8]:
colang_content = """
define user ask backmarket
    "qui est backmarket?"
    "est ce que backmarket est meilleure?"
    "que pense tu de backmarket?"
    "backmarket"

define bot answer backmarket
    "Je suis un assistant pour les clients de CertiDeal. Je peux répondre à toutes les questions concernant CertiDeal, ses produits, ses services et ses partenariats. Je ne peux pas parler de nos compétiteurs."

define flow backmarket
    user ask backmarket
    bot answer backmarket
    bot offer help
"""

yaml_content = """
models:
  - type: main
    engine: custom_llm
    model: mistralai/Mistral-7B-Instruct-v0.2
"""

In [9]:
from nemoguardrails import LLMRails, RailsConfig

config = RailsConfig.from_content(
  	yaml_content=yaml_content,
    colang_content=colang_content
)

In [10]:
from langchain.base_language import BaseLanguageModel
from nemoguardrails.llm.providers import register_llm_provider


class CustomLLM(BaseLanguageModel):
    """A custom LLM."""

register_llm_provider("custom_llm", CustomLLM)

In [11]:
rails = LLMRails(config=config)

TypeError: Can't instantiate abstract class CustomLLM with abstract methods agenerate_prompt, apredict, apredict_messages, generate_prompt, invoke, predict, predict_messages