In [36]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model
from datasets import Dataset
import pandas as pd
import torch
from peft import PeftModel, PeftModelForCausalLM, LoraModel
from transformers.models.llama.modeling_llama import LlamaForCausalLM, LlamaModel, LlamaDecoderLayer, LlamaAttention
from pathlib import Path
from torch.nn import Embedding, ModuleList
from peft.tuners.lora.layer import Linear

ASSETS_DIR = Path("./assets")
dataset_path = ASSETS_DIR / "fine-tuning-small.csv" 
eval_dataset_path = ASSETS_DIR / "evaluation-small.csv"
eval_data_output_path = ASSETS_DIR / "evaluation-small-output.csv"

# Temp
OUTPUT_DIR =  "./output/deepseek_coder_v2"

# Models
MODEL_DIR = Path("./models")
WEIGHTS_PATH = MODEL_DIR / 'model_weights_ast.pth'
MODEL_PATH= MODEL_DIR / 'model_peft'

In [37]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on {device}")
tokenizer = AutoTokenizer.from_pretrained("JetBrains/deepseek-coder-1.3B-kexer")
model = torch.load(f=WEIGHTS_PATH, map_location=device, weights_only=False)
peft_model = PeftModel.from_pretrained(model, MODEL_PATH)
peft_model.to(device)

Running on cpu




PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PeftModelForCausalLM(
      (base_model): LoraModel(
        (model): LlamaForCausalLM(
          (model): LlamaModel(
            (embed_tokens): Embedding(32256, 2048)
            (layers): ModuleList(
              (0-23): 24 x LlamaDecoderLayer(
                (self_attn): LlamaAttention(
                  (q_proj): lora.Linear(
                    (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=2048, out_features=8, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=8, out_features=2048, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                    (lo

In [40]:

def generate_solution(example_problem: str) -> str:
    instruction = "You are a coding assistant. Given the following coding problem, provide a clear and detailed solution.\n"
    input_text = instruction + example_problem
    
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(peft_model.device)
    generated_ids = peft_model.generate(
        input_ids,
        max_length=20000,
        num_beams=5,
        early_stopping=True,
        pad_token_id=tokenizer.pad_token_id
    )
    solution = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return solution

def runSample():
    print("Generating solutions for each problem...")
    data = pd.read_csv(eval_dataset_path)
    for i in range(0, 1):
        column_name = f"solution_{i}"
        print(f"Generating column: {column_name}")
        data[column_name] = data['problem'].apply(generate_solution)
        print(f"Generated column: {column_name}")
    print(data.head())

def runFull():
    print("Generating solutions for each problem...")
    data = pd.read_csv(eval_dataset_path)
    for i in range(0, data.shape[0]):
        column_name = f"solution_{i}"
        print(f"Generating column: {column_name}")
        data[column_name] = data['problem'].apply(generate_solution)
        print(f"Generated column: {column_name}")
    data.to_csv(eval_data_output_path, index=False)
    print("Solutions saved to dataset1_generated_solutions.csv")


In [None]:
runSample()

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generating solutions for each problem...
Generating column: solution_0
