In [None]:
import os
!export TORCH_USE_CUDA_DSA=1
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3"
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['TORCH_USE_CUDA_DSA'] = '1'
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM



In [None]:
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", load_in_8bit=False, device_map='auto',)
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

In [None]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

In [1]:
from peft import LoraConfig, get_peft_model 

config = LoraConfig( r=16, #attention heads
    lora_alpha=32, #alpha scaling
    lora_dropout=0.05, #dropouts
    bias="none",
    task_type="CAUSAL_LM",
                    
    #TARGET_MODULES = 
#["q_proj","k_proj","v_proj","down_proj","gate_proj","up_proj"]# set this for CAUSAL LANGUAGE MODELS (like Bloom, LLaMA) or SEQ TO SEQ (like FLAN, T5)
)

model = get_peft_model(model, config)

ModuleNotFoundError: No module named 'peft'

In [None]:
def get_trainable_params(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
# show trainable params
get_trainable_params(model)

In [None]:
import transformers
from datasets import load_dataset
data = load_dataset("csv", data_files="/home/sai_aswath_pathora_in/data/wikisql_total_.csv")

In [None]:
data

In [None]:
# def merge_cols(data):
#     data["prediction"] = "user question\n"+''.join(data["instruction"]) + "context \n" + data["context"] + "sql output\n"+ data["response "]
#     return data

# data['train'] = data['train'].map(merge_cols)

In [None]:
#test
prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request"
def merge_cols(data):
    data["prediction"] = prompt+"### Instruction:\n"+''.join(data["instruction"]) + "### Input:\n" + data["context"] + "### Response: \n"+ data["response"]
    return data

data['train'] = data['train'].map(merge_cols)

In [None]:
data = data.map(lambda samples: tokenizer(samples['prediction']), batched=True)
tokenizer.pad_token_id = 0

In [None]:
len(data["train"]["attention_mask"][768])#518

In [None]:
tokenizer.pad_token_id = 0

In [None]:
trainer = transformers.Trainer(
    model=model, 
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=6, 
        gradient_accumulation_steps=6,
        warmup_steps=25,
        max_steps=100, 
        learning_rate=2e-4, 
        fp16=True,
        logging_steps=1, 
        output_dir='outputs'
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)


In [None]:
torch.cuda.empty_cache()

In [None]:
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

In [None]:
tr.save_pretrained("models/wikisql")

In [None]:
!nvcc -arch=sm_70 -O3 -std=c++11 -c -o main.cu.o main.cu -DTORCH_USE_CUDA_DSA



# inference