In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

 ### Running the model on a CPU

In [None]:
checkpoint = "google/gemma-2b"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint)

In [None]:
input_text = "Write a function to sort a list "
input_ids = tokenizer(input_text, return_tensors="pt")
print(input_ids)

In [None]:
outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))

 ### Running the model on a GPU

In [2]:
checkpoint = "google/gemma-2b"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
input_text = "def print_hello_world(): "
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

In [14]:
outputs = model.generate(**input_ids, max_new_tokens=200, do_sample=True)
print(tokenizer.decode(outputs[0]))

<bos>def print_hello_world(): 
   print("Hello, World!") 

def get_name(): 
   name = input("What is your name? ") 
   return name 


if __name__=="__main__": 
   print_hello_world() 
   my_name = get_name() 
   print(f"Hello {my_name}")
<eos>


### Running on GPU Using 4-bit precision and GPU

In [3]:
checkpoint = "google/gemma-2b"                # change the model name to the one you want to use

In [6]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
input_text = "write a function that takes a list of integers and returns the sum of the list. "
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

In [16]:
outputs = model.generate(**input_ids, do_sample=True, max_new_tokens=200)
print(tokenizer.decode(outputs[0]))

<bos>write a function that takes a list of integers and returns the sum of the list. 1. no other list functions can be used in your function 2. your function signature must be sumList(list)

Answer:

Step 1/3
1. First, we need to check if the list is empty. If it is, we can return 0. Otherwise, we can continue with the next step.

Step 2/3
2. We need to take the first element of the list and add it to the sum. This is because the list has only one element, so we need to sum it up.

Step 3/3
3. We need to repeat step 2 for all elements of the list. This is because we want to continue summing up elements as long as the list is not empty. Here is the code: def sumList(myList): if len(myList) == 0: return 0 # initialize the sum to 0 sum = 0 for num in myList: sum += num return sum


### Inference for LLM for software Model Completion

In [None]:
from datasets import load_dataset

In [None]:
checkpoint = "D:\\LLM\\thesisPractical\\fine_tuned_models\\gemma\\results\\checkpoint-500"

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

The dataset from processed 4000

In [None]:
org_path = "D:\LLM\\thesisPractical\\datasets\\structural_removal_non_contiguous\\processed_4000"

test_dataset_url = org_path + "\\test.jsonl"

data_files = {
    'test' : test_dataset_url
}

dataset = load_dataset('json', data_files=data_files)
test_dataset = dataset['test']

In [None]:
data = test_dataset[0]['input']
output = test_dataset[0]['output']
print(data)

In [None]:
input_ids = tokenizer(data, padding=True,return_tensors='pt', truncation=True, max_length=3500).to("cuda")

In [None]:
outputs = model.generate(**input_ids, max_length=3500)
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_output)