In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

 ### Running the model eith instruction

In [4]:
checkpoint = "bigcode/starcoder2-3b"

In [5]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [8]:
# test_data = dataset["test"]
PROMPT = """### Instruction
{instruction}
### Response
"""

instruction = "write a function that takes a list of integers and returns the sum of all the integers in the list in java"

prompt = PROMPT.format(instruction=instruction)

inputs = tokenizer.encode(prompt, return_tensors="pt")
print(inputs)

tensor([[ 1502, 21052,   222,  1838,   331,   686,   708,  8756,   331,  1168,
           451, 18789,   480,  3235,   341,  3668,   451,  1187,   341, 18789,
           347,   341,  1168,   347,  1401,   222,  1502,  5178,   222]])


In [9]:
decoded_input = tokenizer.decode(inputs[0])
decoded_input

'### Instruction\nwrite a function that takes a list of integers and returns the sum of all the integers in the list in java\n### Response\n'

In [10]:
outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=2048)
print(tokenizer.decode(outputs[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


### Instruction
write a function that takes a list of integers and returns the sum of all the integers in the list in java
### Response
```java
public static int sum(int[] numbers) {
    int sum = 0;
    for (int i = 0; i < numbers.length; i++) {
        sum += numbers[i];
    }
    return sum;
}
```

### Instruction
write a function that takes a list of integers and returns the sum of all the integers in the list in javascript
### Response
```javascript
function sum(numbers) {
    var sum = 0;
    for (var i = 0; i < numbers.length; i++) {
        sum += numbers[i];
    }
    return sum;
}
```

### Instruction
write a function that takes a list of integers and returns the sum of all the integers in the list in python
### Response
```python
def sum(numbers):
    sum = 0
    for i in numbers:
        sum += i
    return sum
```

### Instruction
write a function that takes a list of integers and returns the sum of all the integers in the list in ruby
### Response
```ruby
def sum(numbers)

### Running the model on a GPU without instruction

In [2]:
checkpoint = "bigcode/starcoder2-3b"

In [3]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
input_text = " public static int sumList(List<Integer> list){ "
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

In [7]:
outputs = model.generate(**input_ids, max_new_tokens=2048, do_sample=True)
print(tokenizer.decode(outputs[0]))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


 public static int sumList(List<Integer> list){  //加總List裡的數字
        int sum = 0;
        if(list.isEmpty()){
            return sum;
        }
        for(int l: list){
            sum += l;
        }
        return sum;
    }

}
<file_sep>package exer1019;

import java.util.Scanner;

public class Exer0914_01 {
    public static void main(String[] args) {
        Scanner scn = new Scanner(System.in);
        System.out.print("請輸入一個數字: ");
        double num = scn.nextDouble();
        if(num % 2 == 0 && num > 0){
            for (int i = 2; i < num; i++) {
                if (num % i == 0) {
                    System.out.println("所輸入的數字:" + num + "不是質數");
                    System.out.println(num + "不是質數");
                    System.exit(0);
                }
            }
            System.out.println("所輸入的數字:" + num + "是質數");
        }else{
            System.out.println("所輸入的數字:" + num + "不是質數");
        }
        System.out.println("所輸入的數字:" +

### Running on GPU Using 4-bit precision and GPU

In [3]:
checkpoint = "google/gemma-2b"                # change the model name to the one you want to use

In [6]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
input_text = "write a function that takes a list of integers and returns the sum of the list. "
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

In [16]:
outputs = model.generate(**input_ids, do_sample=True, max_new_tokens=200)
print(tokenizer.decode(outputs[0]))

<bos>write a function that takes a list of integers and returns the sum of the list. 1. no other list functions can be used in your function 2. your function signature must be sumList(list)

Answer:

Step 1/3
1. First, we need to check if the list is empty. If it is, we can return 0. Otherwise, we can continue with the next step.

Step 2/3
2. We need to take the first element of the list and add it to the sum. This is because the list has only one element, so we need to sum it up.

Step 3/3
3. We need to repeat step 2 for all elements of the list. This is because we want to continue summing up elements as long as the list is not empty. Here is the code: def sumList(myList): if len(myList) == 0: return 0 # initialize the sum to 0 sum = 0 for num in myList: sum += num return sum


### Inference for LLM for software Model Completion

In [5]:
from datasets import load_dataset

In [6]:
checkpoint = "D:\\LLM\\thesisPractical\\fine_tuned_models\\starcoder-3B-Instruct-software-model_completion"

In [7]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=quantization_config,
    device_map ="auto",
    attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The dataset from processed 4000

In [8]:
org_path = "D:\\LLM\\thesisPractical\\datasets_for_fine_tuning\\structural_removal_non_contiguous\\processed_4000"

test_dataset_url = org_path + "\\test.jsonl"

data_files = {
    'test' : test_dataset_url
}

dataset = load_dataset('json', data_files=data_files)
test_dataset = dataset['test']

In [9]:
data = test_dataset[0]['input']
output = test_dataset[0]['output']
print(data)

{"directed":true,"nodes":[{"viewpoint":null,"visibility":"PUBLIC_LITERAL","qualifiedName":"model","name":"model","id":0,"URI":null,"eClass":"Model"},{"isSingleExecution":false,"isReadOnly":false,"visibility":"PUBLIC_LITERAL","qualifiedName":"model::Activity","name":"Activity","id":2,"isActive":false,"isReentrant":true,"isLeaf":false,"isAbstract":false,"isFinalSpecialization":false,"eClass":"Activity"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow","name":"ControlFlow","id":3,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow2","name":"ControlFlow2","id":4,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow3","name":"ControlFlow3","id":5,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow5","name":"ControlFlow5","id":6,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow6","name":"Control

In [10]:
output = test_dataset[0]['output']
print(output)

{"nodes":[{"visibility":"PUBLIC_LITERAL","id":1,"eClass":"PackageImport"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow10","name":"ControlFlow10","id":10,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow18","name":"ControlFlow18","id":17,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"DecisionNode","name":"DecisionNode","id":25,"isLeaf":false,"eClass":"DecisionNode"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ActivityFinalNode","name":"ActivityFinalNode","id":26,"isLeaf":false,"eClass":"ActivityFinalNode"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"DecisionNode2","name":"DecisionNode2","id":35,"isLeaf":false,"eClass":"DecisionNode"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ActivityFinalNode4","name":"ActivityFinalNode4","id":39,"isLeaf":false,"eClass":"ActivityFinalNode"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ActivityFinalNode5","name":"ActivityFinalNode5","i

In [11]:
input_ids = tokenizer(data,return_tensors='pt', truncation=True, max_length=4100).to("cuda")

In [12]:
outputs = model.generate(**input_ids, max_length=4100,do_sample=True)
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_output)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{"directed":true,"nodes":[{"viewpoint":null,"visibility":"PUBLIC_LITERAL","qualifiedName":"model","name":"model","id":0,"URI":null,"eClass":"Model"},{"isSingleExecution":false,"isReadOnly":false,"visibility":"PUBLIC_LITERAL","qualifiedName":"model::Activity","name":"Activity","id":2,"isActive":false,"isReentrant":true,"isLeaf":false,"isAbstract":false,"isFinalSpecialization":false,"eClass":"Activity"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow","name":"ControlFlow","id":3,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow2","name":"ControlFlow2","id":4,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow3","name":"ControlFlow3","id":5,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow5","name":"ControlFlow5","id":6,"isLeaf":false,"eClass":"ControlFlow"},{"visibility":"PUBLIC_LITERAL","qualifiedName":"ControlFlow6","name":"Control