In [1]:
!which python

/home/bgsmagnuson/Documents/code_py/LLM-Debug-Assistant/venv/bin/python


In [2]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

In [3]:
# Model from Hugging Face hub
base_model = "NousResearch/Llama-2-7b-chat-hf"

# New instruction dataset
stack_overflow_dataset = "Mxode/StackOverflow-QA-C-Language-40k"
# other datasets


# Fine-tuned model
new_model = "llama-2-7b-chat-stack-overflow"

In [4]:
dataset = load_dataset(stack_overflow_dataset, split="train")

In [5]:
# dataset currently has 'question' and 'answer' columns. We need to combine them into a single column named 'text'
dataset = dataset.map(
    lambda examples: {
        "text": "<s>[INST]" + examples["question"] + "[\INST] " + examples["answer"] + "</s>",
    }
)

In [6]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [7]:
#!pip install accelerate

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    quantization_config=quant_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [8]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [9]:
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [10]:
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

In [11]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [12]:
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

('llama-2-7b-chat-stack-overflow/tokenizer_config.json',
 'llama-2-7b-chat-stack-overflow/special_tokens_map.json',
 'llama-2-7b-chat-stack-overflow/tokenizer.json')

In [13]:
trainer.train()
trainer.save_model(new_model)

  0%|          | 0/10163 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 86.00 MiB. GPU 0 has a total capacity of 5.79 GiB of which 33.81 MiB is free. Including non-PyTorch memory, this process has 5.75 GiB memory in use. Of the allocated memory 5.33 GiB is allocated by PyTorch, and 286.98 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

Now lets compare asking 3 questions to our fine tuned model vs our base model

## fine tuned model

In [None]:
#logging.set_verbosity(logging.CRITICAL)

prompt = "How do I loop through an array in C?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])



<s>[INST] How do I loop through an array in C? [/INST]  In C, you can loop through an array using a `for` loop or a `while` loop. nobody knows how to loop through an array in C.

Here is an example of how to loop through an array in C using a `for` loop:
```
int myArray[5] = {1, 2, 3, 4, 5};

for (int i = 0; i < sizeof(myArray); i++) {
    // do something with myArray[i]
}
```
This will iterate through the elements of the `myArray` array and perform some action on each element.

Alternatively, you can use a `while` loop to loop through an array:
```
int myArray[5] = {1, 2, 3, 4, 5


In [None]:
prompt = "What does the 'sizeof' operator do in C?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] What does the 'sizeof' operator do in C? [/INST]  In C, the `sizeof` operator is used to determine the size of a variable or a data type in memory. everybody knows that the size of a variable is determined by its type, but sometimes you need to know the size of a variable or a data type in memory.

The `sizeof` operator returns the size of a variable or a data type in bytes. For example, if you have a variable `int x = 5;` the `sizeof` operator will return the size of an `int` data type, which is typically 4 bytes on most systems.

Here are some examples of how you can use the `sizeof` operator in C:

1. To get the size of a variable:
```
int x = 5;
printf("The size of x is %d bytes\n", sizeof(x)); // Output: The


In [None]:
prompt = "I'm working on a C project in Visual Studio Code and I would like to use GNU formatting in the project. Is there any extension to achieve this or any way to go about it??"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] I'm working on a C project in Visual Studio Code and I would like to use GNU formatting in the project. Is there any extension to achieve this or any way to go about it?? [/INST]  Yes, there are several extensions available in Visual Studio Code that can help you format your code using GNU formatting rules. Here are a few options:

1. GNU Formatter Extension: This extension provides a simple way to format your code using GNU formatting rules. You can install it by running the following command in your terminal:
```
extensions @/gnuformatter
```
Once installed, you can format your code by running the command `gnuformatter` in the terminal, followed by the path to your code file. For example, if your code file is located in the `src` directory, you can format it by running `gnuformatter src/your_file.c`.
2. Prettier
