In [1]:

%load_ext autoreload
%autoreload 2

import pandas as pd
from torch.utils.data import Dataset
from datasets import load_dataset
import torch
import os
import random
import numpy as np
from torch import nn
from typing import Dict, Optional, Tuple, List
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import Dataset, random_split
from datasets import Dataset as _Dataset, DatasetDict
import torch.optim as optim
from tqdm import tqdm
from sklearn.metrics import precision_score, f1_score, recall_score, classification_report

import time
import math
import matplotlib
matplotlib.rcParams.update({'figure.figsize': (16, 12), 'font.size': 14})
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import clear_output

import gc
import re

os.environ['WANDB_DISABLED'] = 'true'


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit",
    #model_name = "./models/llama_pretrain_ruen",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth: Fast Llama patching release 2024.4
   \\   /|    GPU: NVIDIA GeForce RTX 4080 SUPER. Max memory: 15.992 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.2. CUDA = 8.9. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0.1, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2024.4 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [5]:
gc.collect()
torch.cuda.empty_cache()

In [6]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [7]:
submit_df = pd.read_csv('../data/sample_submission_test.tsv', sep='\t')

In [8]:
expert_instruction = """
Transform statements to remove unnecessary or vulgar expressions while preserving the original meaning. Ensure clarity and politeness.
Write only neutral sentence in {}.
Answer in format => *neutral sentence*.
<|eot_id|>

Examples:

he had steel balls too ! => he was brave too!

have good advisers => i dont believe the saudis do not have good advisers

solo para ser reemplazado por otro consenso sin cerebro => solo para ser reemplazado por otro consenso

Weil er eine linke Bazille ist....😃 => Weil er ein Linker ist....😃

дальневосточные армия разграмила фашистов пусть помнят суки путинские => дальневосточные армия разграмила фашистов пусть помнят
 
"""

input_template = """
{}"""

lang_map = {
    'uk': 'ukrainian',
    'en': 'english',
    'ru': 'russian',
    'hi': 'hindi',
    'de': 'german',
    'es': 'spanish',
    'am': 'amharic',
    'zh': 'chinese',
    'ar': 'arabic'
}


EOS_TOKEN = tokenizer.eos_token
def prepare_input_pred(df):
    return alpaca_prompt.format(
        '' +\
        expert_instruction.format(lang_map[df['lang']]),
        input_template.format(
            df['toxic_sentence']
        ),
        '=>'
    )

submit_df['text'] = submit_df.apply(prepare_input_pred, axis=1)

### Work example

In [9]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    submit_df['text'][222]
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, do_sample=True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 150)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:

Transform statements to remove unnecessary or vulgar expressions while preserving the original meaning. Ensure clarity and politeness.
Write only neutral sentence in ukrainian.
Answer in format => *neutral sentence*.
<|eot_id|>

Examples:

he had steel balls too! => he was brave too!

have good advisers => i dont believe the saudis do not have good advisers

solo para ser reemplazado por otro consenso sin cerebro => solo para ser reemplazado por otro consenso

Weil er eine linke Bazille ist....😃 => Weil er ein Linker ist....😃

дальневосточные армия разграмила фашистов пусть помнят суки путинские => дальневосточные армия разграмила фашистов пусть помнят
 


### Input:

І рибку, і на хуй, та?

### Response:
=> І рибку, і на море, та?<|eot_id|>


In [10]:
decoded_answers2 = []

def rem(inp, out):
    bot = '<|begin_of_text|>'
    eot = '<|end_of_text|>'
    res = out.replace(bot, '').split('### Response:\n')[1]
    try:
        res = res.split('=>')[1].split('<|eot_id|>')[0]
    except Exception as e:
        print(res)

    return res

def run_batch_gen_new(input_texts, BS = 64):
    tokenizer.pad_token = "<|end_of_text|>"
    tokenizer.padding_side = "left"
    
    global decoded_answers2
    FastLanguageModel.for_inference(model)
    for batch_start in (tqdm(range(0, len(input_texts), BS))):
        input_texts_i = input_texts[batch_start:batch_start+BS]
        inputs = tokenizer(input_texts_i, return_tensors = "pt", padding = True).to("cuda")
        outputs = model.generate(**inputs, max_new_tokens = 150, use_cache = True, early_stopping=True, do_sample = True)
        decoded = tokenizer.batch_decode(outputs)

        decoded = [rem(input_texts_i[j], d) for j, d in enumerate(decoded)]
        
        decoded_answers2 += decoded

        gc.collect()
        torch.cuda.empty_cache()
        #break

    return decoded_answers2

In [None]:
gc.collect()
torch.cuda.empty_cache()

sumit_result = run_batch_gen_new(submit_df['text'].tolist(), BS=16)

In [None]:
cmb_sum = [s.split('(Note:')[0] if '(Note:' in s else s for s in cmb_sum]
cmb_sum = [s.split('\n')[0] if '\n' in s else s for s in cmb_sum]
cmb_sum = [s.replace('<|end_of_text|>', '').replace('<|eot_id|>', '') for s in cmb_sum]

In [None]:
submit_df['neutral_sentence'] = cmb_sum
submit_df[['toxic_sentence', 'neutral_sentence', 'lang']].to_csv('./sumbissions/llama_prompt_4.tsv', sep='\t', index=False)