In [10]:
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
checkpoint = "Salesforce/codegen-2B-multi"
model = AutoModelForCausalLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint, truncation=True, max_length=2048)

In [11]:
device = 'cuda'

In [3]:
%cd data

/content/data


In [8]:
!python ../process_data.py

In [12]:
prompt = """
Consider the following input output pairs where input is a program and output is the program with High Level Synthesis pragmas inserted\n

Input:

void kernel_fdtd_2d(int tmax,int nx,int ny,double ex[60][80],double ey[60][80],double hz[60][80],double _fict_[40])
{
  int t;
  int i;
  int j;
  for (t = 0; t < 40; t++) {
    for (j = 0; j < 80; j++) {
      ey[0][j] = _fict_[t];
    }
    for (i = 1; i < 60; i++) {
      for (j = 0; j < 80; j++) {
        ey[i][j] = ey[i][j] - 0.5 * (hz[i][j] - hz[i - 1][j]);
      }
    }

    for (i = 0; i < 60; i++) {

      for (j = 1; j < 80; j++) {
        ex[i][j] = ex[i][j] - 0.5 * (hz[i][j] - hz[i][j - 1]);
      }
    }
    for (i = 0; i < 59; i++) {
      for (j = 0; j < 79; j++) {
        hz[i][j] = hz[i][j] - 0.7 * (ex[i][j + 1] - ex[i][j] + ey[i + 1][j] - ey[i][j]);
      }
    }
  }
}
Output:
#pragma ACCEL kernel

void kernel_fdtd_2d(int tmax,int nx,int ny,double ex[60][80],double ey[60][80],double hz[60][80],double _fict_[40])
{
  int t;
  int i;
  int j;
//#pragma scop

#pragma ACCEL PIPELINE auto{__PIPE__L0}

#pragma ACCEL TILE FACTOR=auto{__TILE__L0}

#pragma ACCEL PARALLEL FACTOR=auto{__PARA__L0}
  for (t = 0; t < 40; t++) {

#pragma ACCEL PARALLEL FACTOR=auto{__PARA__L0_0}
    for (j = 0; j < 80; j++) {
      ey[0][j] = _fict_[t];
    }

#pragma ACCEL PIPELINE auto{__PIPE__L0_1}

#pragma ACCEL TILE FACTOR=auto{__TILE__L0_1}

#pragma ACCEL PARALLEL FACTOR=auto{__PARA__L0_1}
    for (i = 1; i < 60; i++) {

#pragma ACCEL PARALLEL FACTOR=auto{__PARA__L0_1_0}
      for (j = 0; j < 80; j++) {
        ey[i][j] = ey[i][j] - 0.5 * (hz[i][j] - hz[i - 1][j]);
      }
    }

#pragma ACCEL PIPELINE auto{__PIPE__L0_2}

#pragma ACCEL TILE FACTOR=auto{__TILE__L0_2}

#pragma ACCEL PARALLEL FACTOR=auto{__PARA__L0_2}
    for (i = 0; i < 60; i++) {

#pragma ACCEL PARALLEL FACTOR=auto{__PARA__L0_2_0}
      for (j = 1; j < 80; j++) {
        ex[i][j] = ex[i][j] - 0.5 * (hz[i][j] - hz[i][j - 1]);
      }
    }

#pragma ACCEL PIPELINE auto{__PIPE__L0_3}

#pragma ACCEL TILE FACTOR=auto{__TILE__L0_3}

#pragma ACCEL PARALLEL FACTOR=auto{__PARA__L0_3}
    for (i = 0; i < 59; i++) {

#pragma ACCEL PARALLEL FACTOR=auto{__PARA__L0_3_0}
      for (j = 0; j < 79; j++) {
        hz[i][j] = hz[i][j] - 0.7 * (ex[i][j + 1] - ex[i][j] + ey[i + 1][j] - ey[i][j]);
      }
    }
  }
//#pragma endscop
}


"""

In [13]:
prompt += """
Act as an expert in High Level Synthesis, insert High Level Synthesis pragma to the folloing program.
"""

In [14]:
print(prompt)


Consider the following input output pairs where input is a program and output is the program with High Level Synthesis pragmas inserted


Input:

void kernel_fdtd_2d(int tmax,int nx,int ny,double ex[60][80],double ey[60][80],double hz[60][80],double _fict_[40])
{
  int t;
  int i;
  int j;
  for (t = 0; t < 40; t++) {
    for (j = 0; j < 80; j++) {
      ey[0][j] = _fict_[t];
    }
    for (i = 1; i < 60; i++) {
      for (j = 0; j < 80; j++) {
        ey[i][j] = ey[i][j] - 0.5 * (hz[i][j] - hz[i - 1][j]);
      }
    }
    
    for (i = 0; i < 60; i++) {
      
      for (j = 1; j < 80; j++) {
        ex[i][j] = ex[i][j] - 0.5 * (hz[i][j] - hz[i][j - 1]);
      }
    }
    for (i = 0; i < 59; i++) {
      for (j = 0; j < 79; j++) {
        hz[i][j] = hz[i][j] - 0.7 * (ex[i][j + 1] - ex[i][j] + ey[i + 1][j] - ey[i][j]);
      }
    }
  }
}
Output:
#pragma ACCEL kernel

void kernel_fdtd_2d(int tmax,int nx,int ny,double ex[60][80],double ey[60][80],double hz[60][80],double _fict_[40])
{

In [15]:
programs = [
        'processed-mm-krnl.cpp',
        'processed-dotprod-krnl2.cpp',
        'processed-dotprod-krnl.cpp',
        'processed-ewmm-krnl.cpp',
        'processed-jacobi_1d-krnl.cpp',
        'processed-madd-krnl.cpp',
        'processed-syrk-krnl.cpp',
        'processed-trmm-krnl.cpp',
        'processed-trmm-opt-krnl.cpp',
        'processed-vadd-krnl.cpp'
]
context = {}
for program in programs:
  with open(program, 'r') as f:
    lines = f.readlines()
    context[program] = prompt+'Input:'+'\n'.join(lines) + "\nOutput:\n"

In [16]:
from tqdm import tqdm

In [17]:
model = model.to(device)

In [None]:
lens = [2000]
for len in lens:
    for program in tqdm(context):
        text = context[program]
        inputs = tokenizer(text, return_tensors="pt", truncation=True)
        attention_mask = inputs["attention_mask"].to(device)
        input_ids=inputs["input_ids"].to(device)
        completion = model.generate(input_ids=input_ids, max_length=len, attention_mask=attention_mask)
        output = tokenizer.decode(completion[0])
        output_idx = output.rfind("Output:\n")
        with open('./outputs/fewshots/' + program[program.find('processed'):program.find('.cpp')]+str(len)+'.out','w') as f:
            f.write(output[output_idx:])


  0%|          | 0/10 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 10%|█         | 1/10 [00:10<01:35, 10.61s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [19]:
!zip -r /content/fewshots.zip /content/data/outputs/fewshots/


  adding: content/data/outputs/fewshots/ (stored 0%)
  adding: content/data/outputs/fewshots/processed-vadd-krnl2000.out (deflated 77%)
  adding: content/data/outputs/fewshots/processed-trmm-krnl2000.out (deflated 78%)
  adding: content/data/outputs/fewshots/processed-syrk-krnl2000.out (deflated 80%)
  adding: content/data/outputs/fewshots/processed-dotprod-krnl2000.out (deflated 77%)
  adding: content/data/outputs/fewshots/processed-ewmm-krnl2000.out (deflated 77%)
  adding: content/data/outputs/fewshots/processed-trmm-opt-krnl2000.out (deflated 78%)
  adding: content/data/outputs/fewshots/.ipynb_checkpoints/ (stored 0%)
  adding: content/data/outputs/fewshots/processed-mm-krnl2000.out (deflated 77%)
  adding: content/data/outputs/fewshots/processed-madd-krnl2000.out (deflated 77%)
  adding: content/data/outputs/fewshots/processed-dotprod-krnl22000.out (deflated 78%)
  adding: content/data/outputs/fewshots/processed-jacobi_1d-krnl2000.out (deflated 78%)


In [11]:
from google.colab import files
files.download("/content/twoshot.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>