In [1]:
import torch
from prompt_toolkit.contrib.regular_languages.regex_parser import tokenize_regex
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

 ### Running the model on a GPU

In [2]:
checkpoint = "google/gemma-2-2b-it"  

In [3]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Applying chat template

In [4]:
# test_data = dataset["test"]
instruction ="You are a helpful programmer instruction"

messages = [
            {"role": "user", "content": "write a function that takes a list of integers and returns the sum of all the integers in the list in java."}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
)
print(text)

<bos><start_of_turn>user
write a function that takes a list of integers and returns the sum of all the integers in the list in java.<end_of_turn>



In [24]:
text = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
).to("cuda")
print(text)

tensor([[     2,    106,   1645,    108,   4086,    476,   1411,    674,   5548,
            476,   1889,    576,  45047,    578,   8753,    573,   2707,    576,
            832,    573,  45047,    575,    573,   1889,    575,   1821, 235265,
            107,    108,    106,   2516,    108]], device='cuda:0')


In [25]:
outputs = model.generate(input_ids=text, do_sample=True, max_new_tokens=200)
print(tokenizer.decode(outputs[0]))

<bos><start_of_turn>user
write a function that takes a list of integers and returns the sum of all the integers in the list in java.<end_of_turn>
<start_of_turn>model
```java
import java.util.List;

public class SumList {

    public static int sum(List<Integer> numbers) {
        int sum = 0;
        for (Integer number : numbers) { 
            sum += number; 
        }
        return sum;
    }

    public static void main(String[] args) {
        List<Integer> numberList = List.of(1, 2, 3, 4, 5);
        int totalSum = sum(numberList);

        System.out.println("The sum of the numbers is: " + totalSum);
    }
}
```

**Explanation:**

* **Function Definition:**  
    * `public static int sum(List<Integer> numbers)`: 
        * `public` means the method is accessible from other parts of your code.
        * `static` means it's


Without chat template

In [45]:
input = "Instruction : write a function that takes a list of integers and returns the sum of all the integers in the list in java. \n Response:"
input_ids = tokenizer(input, return_tensors="pt").to("cuda")
input_ids

{'input_ids': tensor([[     2,  37854,    865,   5598,    476,   1411,    674,   5548,    476,
           1889,    576,  45047,    578,   8753,    573,   2707,    576,    832,
            573,  45047,    575,    573,   1889,    575,   1821, 235265, 235248,
            108,  10567, 235292]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [46]:
outputs = model.generate(input_ids=input_ids['input_ids'], do_sample=True, max_new_tokens=200)
print(tokenizer.decode(outputs[0]))

<bos>Instruction : write a function that takes a list of integers and returns the sum of all the integers in the list in java. 
 Response: 
 
 ```java
public class Main {
  public static int sumOfList(int[] list) {
    int totalSum = 0;
    for (int i = 0; i < list.length; i++) {
      totalSum += list[i];
    }
    return totalSum;
  }
  
  public static void main(String[] args) {
    int[] numbers = {1, 2, 3, 4, 5};
    System.out.println(sumOfList(numbers));
  }
}
```
  
 **The function `sumOfList` does the following:**

1.  **Takes an integer array (list) as input using `int[] list`.**
2.  **Initializes a variable `totalSum` to 0.** This variable will store the cumulative sum. 
3.  **Loops through the


### Inference for LLM for software Model Completion

In [None]:
from datasets import load_dataset

##### Here is the model we use : 

In [None]:
checkpoint = "D:\\LLM\\thesisPractical\\fine_tuned_models\\gemma\\results\\checkpoint-500"

##### The following functions formats the input data to be used in the model

In [None]:
def format_chat_template_to_print(input):
    row_json = [
        {"role": "user", "content": f'You are an AI assistant that specializes in UML model completion. Given the following incomplete UML model in Json format, complete the model by finding the missing part. Incomplete model :\n{input}'}
    ] 
    return tokenizer.apply_chat_template(row_json, tokenize=False, add_generation_prompt=True)

def format_chat_template(input):
    row_json = [
        {"role": "user", "content": f'You are an AI assistant that specializes in UML model completion. Given the following incomplete UML model in Json format, complete the model by finding the missing part. Incomplete model :\n{input}'}
    ] 
    return tokenizer.apply_chat_template(row_json, tokenize=True, return_tensors="pt", add_generation_prompt=True).to("cuda")

##### Load the model and tokenizer

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

##### Load the dataset to test the model

In [None]:
org_path = "D:\LLM\\thesisPractical\\datasets\\structural_removal_non_contiguous\\processed_4000"

test_dataset_url = org_path + "\\test.jsonl"

data_files = {
    'test' : test_dataset_url
}

dataset = load_dataset('json', data_files=data_files)
test_dataset = dataset['test']

Apply the chat template to the input data

In [None]:
data = test_dataset[0]['input']
text_to_print = format_chat_template_to_print(data)
print(text_to_print)
input_ids = format_chat_template(data)

In [None]:
output = test_dataset[0]['output']
print(output)

Generate the output from the model

In [None]:
outputs = model.generate(**input_ids, max_length=3500)
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_output)