In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import numpy as np
import pandas as pd
import torch

from src.RAG_Calculater import RAG, get_top_n_articles
from src.Massege_Factory import massage_factory
from src.Case_Builder import (device,
                              bert_version,
                              bert_model_name,
                              genai_version,
                              genai_model_name,
                              prompt_strategy_used,
                              dataset_name,
                              massage_strategy
                              )

In [2]:
data_train = pd.read_json(f'src/dataset/clean/{dataset_name}/{bert_version}_train.json')
data_val = pd.read_json(f'src/dataset/clean/{dataset_name}/{bert_version}_validation.json')
data_test = pd.read_json(f'src/dataset/clean/{dataset_name}/{bert_version}_test.json')

In [3]:
data_train['rag_sentences'] = data_train['sentences_similarity'].apply(RAG)
data_val['rag_sentences'] = data_val['sentences_similarity'].apply(RAG)
data_test['rag_sentences'] = data_test['sentences_similarity'].apply(RAG)

In [4]:
model = AutoModelForCausalLM.from_pretrained(genai_model_name, 
                                              torch_dtype=torch.bfloat16,
                                              low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained(genai_model_name)

In [5]:
model.eval()

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
     

In [6]:
chatbot = pipeline("text-generation", model="BioMistral/BioMistral-7B", max_new_tokens=512, repetition_penalty=1.2, no_repeat_ngram_size=3)

results = []
summaries = []

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [None]:
for idx in range(len(data_test)):
    print(f"\n {idx+1} / {len(data_test)}", end="")
    
    target_row = data_test.loc[idx]
    
    if massage_strategy == "few_shot": 
        ref_rows_indexes = get_top_n_articles(data_train['title_embedding'], target_row['title_embedding'], n=3)
        ref_rows = data_train.loc[ref_rows_indexes].reset_index(drop=True)
        
    else: 
        ref_rows = None
    
    massage = massage_factory(massage_strategy, target_row, ref_rows)
    summary = " ".join(data_test.loc[idx, 'summary'])
    
    answer = chatbot(massage)[0]['generated_text'][-1]['content']
    
    results.append(answer)
    summaries.append(summary)
    


 1 / 241

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 2 / 241

In [None]:
model_results = pd.DataFrame({
    'reference': summaries,
    'prediction': results
})

In [None]:
model_results.to_csv(f'results/{genai_version}_{massage_strategy}_summaries_{bert_version}_{dataset_name}_{prompt_strategy_used}.csv', index=False)