In [1]:
import torch
from prompt_toolkit.contrib.regular_languages.regex_parser import tokenize_regex
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

 ### Running the model on a GPU

In [2]:
checkpoint = "google/gemma-2-2b-it"  

In [3]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Applying chat template

In [4]:
# test_data = dataset["test"]
instruction ="You are a helpful programmer instruction"

messages = [
            {"role": "user", "content": "write a function that takes a list of integers and returns the sum of all the integers in the list in java."}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
)
print(text)

<bos><start_of_turn>user
write a function that takes a list of integers and returns the sum of all the integers in the list in java.<end_of_turn>



In [24]:
text = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
).to("cuda")
print(text)

tensor([[     2,    106,   1645,    108,   4086,    476,   1411,    674,   5548,
            476,   1889,    576,  45047,    578,   8753,    573,   2707,    576,
            832,    573,  45047,    575,    573,   1889,    575,   1821, 235265,
            107,    108,    106,   2516,    108]], device='cuda:0')


In [25]:
outputs = model.generate(input_ids=text, do_sample=True, max_new_tokens=200)
print(tokenizer.decode(outputs[0]))

<bos><start_of_turn>user
write a function that takes a list of integers and returns the sum of all the integers in the list in java.<end_of_turn>
<start_of_turn>model
```java
import java.util.List;

public class SumList {

    public static int sum(List<Integer> numbers) {
        int sum = 0;
        for (Integer number : numbers) { 
            sum += number; 
        }
        return sum;
    }

    public static void main(String[] args) {
        List<Integer> numberList = List.of(1, 2, 3, 4, 5);
        int totalSum = sum(numberList);

        System.out.println("The sum of the numbers is: " + totalSum);
    }
}
```

**Explanation:**

* **Function Definition:**  
    * `public static int sum(List<Integer> numbers)`: 
        * `public` means the method is accessible from other parts of your code.
        * `static` means it's


Without chat template

In [45]:
input = "Instruction : write a function that takes a list of integers and returns the sum of all the integers in the list in java. \n Response:"
input_ids = tokenizer(input, return_tensors="pt").to("cuda")
input_ids

{'input_ids': tensor([[     2,  37854,    865,   5598,    476,   1411,    674,   5548,    476,
           1889,    576,  45047,    578,   8753,    573,   2707,    576,    832,
            573,  45047,    575,    573,   1889,    575,   1821, 235265, 235248,
            108,  10567, 235292]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [46]:
outputs = model.generate(input_ids=input_ids['input_ids'], do_sample=True, max_new_tokens=200)
print(tokenizer.decode(outputs[0]))

<bos>Instruction : write a function that takes a list of integers and returns the sum of all the integers in the list in java. 
 Response: 
 
 ```java
public class Main {
  public static int sumOfList(int[] list) {
    int totalSum = 0;
    for (int i = 0; i < list.length; i++) {
      totalSum += list[i];
    }
    return totalSum;
  }
  
  public static void main(String[] args) {
    int[] numbers = {1, 2, 3, 4, 5};
    System.out.println(sumOfList(numbers));
  }
}
```
  
 **The function `sumOfList` does the following:**

1.  **Takes an integer array (list) as input using `int[] list`.**
2.  **Initializes a variable `totalSum` to 0.** This variable will store the cumulative sum. 
3.  **Loops through the


### Inference for LLM for software Model Completion

In [2]:
from datasets import load_dataset

##### Here is the model we use : 

In [3]:
checkpoint = "D:\\LLM\\thesisPractical\\fine_tuned_models\\gemma-2-2b-it-software-model_completion\\gemma-2-2b-it-software-model_completion"

##### The following functions formats the input data to be used in the model

In [4]:
def format_chat_template_to_print(input):
    row_json = [
        {"role": "user", "content": f'You are an AI assistant that specializes in UML model completion. Given the following incomplete UML model in Json format, complete the model by finding the missing part. Incomplete model :\n{input}'}
    ] 
    return tokenizer.apply_chat_template(row_json, tokenize=False, add_generation_prompt=True)

def format_chat_template(input):
    row_json = [
        {"role": "user", "content": f'You are an AI assistant that specializes in UML model completion. Given the following incomplete UML model in Json format, complete the model by finding the missing part. Incomplete model :\n{input}'}
    ] 
    return tokenizer.apply_chat_template(row_json, tokenize=True, return_tensors="pt", add_generation_prompt=True).to("cuda")

##### Load the model and tokenizer

In [5]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

##### Load the dataset to test the model

In [6]:
org_path = "D:\\LLM\\thesisPractical\\datasets_for_fine_tuning\\structural_removal_non_contiguous\\processed_4000"

test_dataset_url = org_path + "\\test.jsonl"

data_files = {
    'test' : test_dataset_url
}

dataset = load_dataset('json', data_files=data_files)
test_dataset = dataset['test']

Apply the chat template to the input data

In [7]:
data = test_dataset[0]['input']
text_to_print = format_chat_template_to_print(data)
print(text_to_print)
input_ids = format_chat_template(data)

<bos><start_of_turn>user
You are an AI assistant that specializes in UML model completion. Given the following incomplete UML model in Json format, complete the model by finding the missing part. Incomplete model :
{"directed":true,"nodes":[{"viewpoint":null,"visibility":"PUBLIC_LITERAL","qualifiedName":"model","name":"model","id":0,"URI":null,"eClass":"Model"},{"isSingleExecution":false,"isReadOnly":false,"visibility":"PUBLIC_LITERAL","qualifiedName":"model::Activity","name":"Activity","id":2,"isActive":false,"isReentrant":true,"isLeaf":false,"isAbstract":false,"isFinalSpecialization":false,"eClass":"Activity"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow","name":"ControlFlow","id":3,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow2","name":"ControlFlow2","id":4,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow3","name":"ControlFlow3","id":5,"isLeaf":false,"eClass":"Contro

In [8]:
output = test_dataset[0]['output']
print(output)

{"nodes":[{"visibility":"PUBLIC_LITERAL","id":1,"eClass":"PackageImport"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow10","name":"ControlFlow10","id":10,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow18","name":"ControlFlow18","id":17,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"DecisionNode","name":"DecisionNode","id":25,"isLeaf":false,"eClass":"DecisionNode"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ActivityFinalNode","name":"ActivityFinalNode","id":26,"isLeaf":false,"eClass":"ActivityFinalNode"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"DecisionNode2","name":"DecisionNode2","id":35,"isLeaf":false,"eClass":"DecisionNode"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ActivityFinalNode4","name":"ActivityFinalNode4","id":39,"isLeaf":false,"eClass":"ActivityFinalNode"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ActivityFinalNode5","name":"ActivityFinalNode5","i

Generate the output from the model

In [10]:
outputs = model.generate(input_ids=input_ids, max_length=4100, do_sample=True)
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_output)

user
You are an AI assistant that specializes in UML model completion. Given the following incomplete UML model in Json format, complete the model by finding the missing part. Incomplete model :
{"directed":true,"nodes":[{"viewpoint":null,"visibility":"PUBLIC_LITERAL","qualifiedName":"model","name":"model","id":0,"URI":null,"eClass":"Model"},{"isSingleExecution":false,"isReadOnly":false,"visibility":"PUBLIC_LITERAL","qualifiedName":"model::Activity","name":"Activity","id":2,"isActive":false,"isReentrant":true,"isLeaf":false,"isAbstract":false,"isFinalSpecialization":false,"eClass":"Activity"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow","name":"ControlFlow","id":3,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow2","name":"ControlFlow2","id":4,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow3","name":"ControlFlow3","id":5,"isLeaf":false,"eClass":"ControlFlow"},{"visibility