In [None]:
!huggingface-cli login --token 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

In [None]:
!pip install -U bitsandbytes
!pip install -U accelerate

In [None]:
import torch
import warnings
import pandas as pd

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

warnings.filterwarnings('ignore')
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

In [None]:
class Generator:
    def __init__(self, model_name, quantize_4bit=True, use_flash_attention=False):
        self.model_name = model_name
        self.quantize_4bit = quantize_4bit
        self.use_flash_attention = use_flash_attention
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = None
        self.tokenizer = None
        self._load_model()

    def _load_model(self):
        quantization_config = None
        if self.quantize_4bit:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_use_double_quant=True,
                bnb_4bit_compute_dtype=torch.bfloat16,
            )

        attn_implementation = None
        if self.use_flash_attention:
            attn_implementation = "flash_attention_2"

        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            quantization_config=quantization_config,
            attn_implementation=attn_implementation,
            torch_dtype=torch.bfloat16,
            device_map="auto",
        )
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        print("Model and tokenizer loaded successfully.")

    def get_message_format(self, system_prompt, user_prompts):
        formatted_prompts = []
        for user_prompt in user_prompts:
            formatted_prompts.append([
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ])
        return formatted_prompts

    def generate_responses(self, system_prompt, user_prompts, temperature=0.3, top_p=0.75, top_k=0, max_new_tokens=1024):
        messages = self.get_message_format(system_prompt, user_prompts)
        input_ids = self.tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            padding=True,
            return_tensors="pt",
        ).to(self.device)
        prompt_padded_len = len(input_ids[0])
        gen_tokens = self.model.generate(
            input_ids,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            max_new_tokens=max_new_tokens,
            do_sample=True,
        )
        gen_tokens = [gt[prompt_padded_len:] for gt in gen_tokens]
        return self.tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)

In [None]:
def load_datasets():
    train_df = pd.read_csv('Dataset/train.csv')
    result_df = pd.read_csv('Dataset/result.csv')
    return train_df, result_df

In [None]:
def generate_results(generator, result_df, column_name, batch_size, target_number):
    column_name_to_write = f'{column_name}_Translated'
    start_row = result_df.index[pd.isna(result_df[column_name_to_write])].tolist()[0]
    
    last_query, last_result = '', ''
    system_prompt = ''
    
    print(f'Column Name = {column_name_to_write}, Start row = {start_row}')

    while start_row <= target_number:
        print(f"test_df_counter is {start_row}")

        target_texts = [result_df[column_name][i] for i in range(start_row, start_row + batch_size)]

        if target_texts[-1] == last_query:
            result_df.loc[start_row:start_row + batch_size - 1, column_name_to_write] = last_result
            start_row += batch_size
            continue

        results = generator.generate_responses(system_prompt, target_texts)
        torch.cuda.empty_cache()

        last_result = results[-1]
        result_df.loc[start_row:start_row + batch_size - 1, column_name_to_write] = results

        if (start_row // 4) % 5 == 0:
            result_df.to_csv('result.csv', index=False)
            print(f"Dataframe saved to csv file at iteration {i}")

        start_row += batch_size

In [None]:
train_df, result_df = load_datasets()

In [None]:
MODEL_NAME = "CohereForAI/aya-23-8B"
generator = Generator(MODEL_NAME)

In [None]:
column_names = ['Context', 'Response']
for column_name in column_names:
    generate_results(generator, result_df, column_name, batch_size=4, target_number=100)