# Generative AI Use Case: Summarize Dialogue

Welcome to the practical side of this course. In this lab you will do the dialogue summarization task using generative AI. You will explore how the input text affects the output of the model, and perform prompt engineering to direct it towards the task you need. By comparing zero shot, one shot, and few shot inferences, you will take the first step towards prompt engineering and see how it can enhance the generative output of Large Language Models.

# Table of Contents

- [ 1 - Set up Kernel and Required Dependencies](#1)
- [ 2 - Summarize Dialogue without Prompt Engineering](#2)
- [ 3 - Summarize Dialogue with an Instruction Prompt](#3)
  - [ 3.1 - Zero Shot Inference with an Instruction Prompt](#3.1)
  - [ 3.2 - Zero Shot Inference with the Prompt Template from FLAN-T5](#3.2)
- [ 4 - Summarize Dialogue with One Shot and Few Shot Inference](#4)
  - [ 4.1 - One Shot Inference](#4.1)
  - [ 4.2 - Few Shot Inference](#4.2)
- [ 5 - Generative Configuration Parameters for Inference](#5)


<a name='1'></a>
## 1 - Set up Kernel and Required Dependencies

First, check that the correct kernel is chosen.

<img src="images/kernel_set_up.png" width="300"/>

You can click on that (top right of the screen) to see and check the details of the image, kernel, and instance type.

<img src="images/w1_kernel_and_instance_type.png" width="600"/>




Now install the required packages to use PyTorch and Hugging Face transformers and datasets.



In [2]:
import pandas as pd 
import nltk
import sqlite3
from nltk.translate.bleu_score import sentence_bleu

# programming languages that CodeT5 supports
languages = ['Python', 'Java', 'JavaScript', 'PHP', 'Ruby', 'Go', 'C', 'C#']
# languages = ['Java']

conn = sqlite3.connect('/Users/guru/research/FixMe/data/FixMe-v1.db')

df_cve = pd.read_sql_query("SELECT * FROM cve;", conn)
df_repository = pd.read_sql_query("SELECT * FROM repository;", conn)
df_hunk = pd.read_sql_query("SELECT * FROM hunk_collection;", conn)
df_patch = pd.read_sql_query("SELECT * FROM patch_collection;", conn)
print(f'Original hunks shape: {df_hunk.shape}')

df = df_hunk[df_hunk.programming_language.isin(languages)].reset_index(drop=True)
df = df[['code_before', 'code_after', 'programming_language']]
print(f'Shape of {languages} hunks: {df.shape}')
df.head()

Original hunks shape: (40405, 18)
Shape of ['Python', 'Java', 'JavaScript', 'PHP', 'Ruby', 'Go', 'C', 'C#'] hunks: (26718, 3)


Unnamed: 0,code_before,code_after,programming_language
0,*********************************************...,*********************************************...,Java
1,private boolean exractAndLoad(ArrayList<St...,private boolean exractAndLoad(ArrayList<St...,Java
2,\n // Fall back to extr...,\n // Fall back to extr...,Java
3,return libName;\n }\n\n private ...,return libName;\n }\n\n private ...,Java
4,* - http://www.gnu.org/copyleft/gpl.html\n *...,* - http://www.gnu.org/copyleft/gpl.html\n *...,JavaScript


In [8]:
df_hunk.columns

Index(['file', 'hunk', 'hunk_patch', 'source', 'target', 'source_lines',
       'target_lines', 'added_lines', 'removed_lines', 'code_before',
       'code_after', 'source_start', 'source_length', 'target_start',
       'target_length', 'section_header', 'hunk_length',
       'programming_language'],
      dtype='object')

In [9]:
df_patch.columns

Index(['file', 'patch_info', 'programming_language', 'source_file',
       'source_timestamp', 'target_file', 'target_timestamp', 'is_binary_file',
       'url', 'message'],
      dtype='object')

In [10]:
df_hunk.head()

Unnamed: 0,file,hunk,hunk_patch,source,target,source_lines,target_lines,added_lines,removed_lines,code_before,code_after,source_start,source_length,target_start,target_length,section_header,hunk_length,programming_language
0,src/libraw_cxx.cpp,"@@ -2336,14 +2336,15 @@ int LibRaw::subtract_b...","@@ -2336,14 +2336,15 @@ int LibRaw::subtract_b...","[' #define MAX(a,b) ((a) > (b) ? (a) : (b))\n'...","[' #define MAX(a,b) ((a) > (b) ? (a) : (b))\n'...",<bound method Hunk.source_lines of <Hunk: @@ 2...,<bound method Hunk.target_lines of <Hunk: @@ 2...,"['\t\t\tint dmax = 0;\n', '\t\t\tfor(i=0; i< s...","['\n', ' for(i=0; i< size*4; i++)\n...","#define MAX(a,b) ((a) > (b) ? (a) : (b))\n#def...","#define MAX(a,b) ((a) > (b) ? (a) : (b))\n#def...",2336,14,2336,15,int LibRaw::subtract_black(),18,C++
1,src/libraw_cxx.cpp,"@@ -2359,9 +2360,10 @@ int LibRaw::subtract_bl...","@@ -2359,9 +2360,10 @@ int LibRaw::subtract_bl...",[' // only calculate channel maximum...,[' // only calculate channel maximum...,<bound method Hunk.source_lines of <Hunk: @@ 2...,<bound method Hunk.target_lines of <Hunk: @@ 2...,"['\t\t int dmax = 0;\n', ' if(dmax...","[' C.data_maximum = 0;\n', ' ...",// only calculate channel maximum;\n...,// only calculate channel maximum;\n...,2359,9,2360,10,int LibRaw::subtract_black(),12,C++
2,src/libraw_cxx.cpp,"@@ -2421,8 +2423,10 @@ void LibRaw::exp_bef(fl...","@@ -2421,8 +2423,10 @@ void LibRaw::exp_bef(fl...",[' imgdata.image[i][3] = lut[imgda...,[' imgdata.image[i][3] = lut[imgda...,<bound method Hunk.source_lines of <Hunk: @@ 2...,<bound method Hunk.target_lines of <Hunk: @@ 2...,"['\tif(C.data_maximum <=TBLN)\n', '\t\tC.data_...",[' C.data_maximum = lut[C.data_maximum];\n'...,imgdata.image[i][3] = lut[imgdata....,imgdata.image[i][3] = lut[imgdata....,2421,8,2423,10,"void LibRaw::exp_bef(float shift, float smooth)",12,C++
3,src/libraw_cxx.cpp,"@@ -2530,7 +2534,7 @@ int LibRaw::dcraw_proces...","@@ -2530,7 +2534,7 @@ int LibRaw::dcraw_proces...","[' \n', ' raw2image_ex(subtract_inline...","[' \n', ' raw2image_ex(subtract_inline...",<bound method Hunk.source_lines of <Hunk: @@ 2...,<bound method Hunk.target_lines of <Hunk: @@ 2...,['\t\tint save_4color = O.four_color_rgb;\n'],[' int save_4color = O.four_color_rgb;\n'],\n raw2image_ex(subtract_inline); // al...,\n raw2image_ex(subtract_inline); // al...,2530,7,2534,7,int LibRaw::dcraw_process(void),8,C++
4,hawtjni-runtime/src/main/java/org/fusesource/h...,"@@ -9,13 +9,11 @@\n *************************...","@@ -9,13 +9,11 @@\n *************************...",[' ******************************************...,[' ******************************************...,<bound method Hunk.source_lines of <Hunk: @@ 9...,<bound method Hunk.target_lines of <Hunk: @@ 9...,"['import java.io.*;\n', 'import java.util.Rand...","['import java.io.File;\n', 'import java.io.Fil...",*********************************************...,*********************************************...,9,13,9,11,,15,Java


In [3]:
def filter_patches(df_patch, max_hunks_per_file=2):
    """Filter URLs with counts less than max_hunks_per_file"""
    # Calculate value counts of 'url' column
    url_counts = df_patch['url'].value_counts()
    urls_less_than_two = url_counts[url_counts <= max_hunks_per_file].index.tolist()
    df_filter = df_patch[df_patch.url.isin(urls_less_than_two)]

    print(f'Shape of filtered patch data: {df_filter.shape}')
    return df_filter

def filter_hunks(df_hunk, df_patch):
    """Filter hunks that are not in the filtered patches"""
    df_hunk = df_hunk[df_hunk.file.isin(df_patch.file)]
    print(f'Shape of filtered hunk data: {df_hunk.shape}')
    return df_hunk

max_hunks_per_file = 1
df_patch_filter = filter_patches(df_patch, max_hunks_per_file)
df_hunk_filter = filter_hunks(df_hunk, df_patch_filter)

df_hunk_filter = df_hunk_filter[df_hunk_filter.programming_language.isin(languages)].reset_index(drop=True)
print(f'Shape of filtered hunks (pl-list): {df_hunk_filter.shape}')

Shape of filtered patch data: (771, 10)
Shape of filtered hunk data: (2840, 18)
Shape of filtered hunks (pl-list): (1880, 18)


In [7]:
df_hunk_filter

Unnamed: 0,file,hunk,hunk_patch,source,target,source_lines,target_lines,added_lines,removed_lines,code_before,code_after,source_start,source_length,target_start,target_length,section_header,hunk_length,programming_language
0,hawtjni-runtime/src/main/java/org/fusesource/h...,"@@ -9,13 +9,11 @@\n *************************...","@@ -9,13 +9,11 @@\n *************************...",[' ******************************************...,[' ******************************************...,<bound method Hunk.source_lines of <Hunk: @@ 9...,<bound method Hunk.target_lines of <Hunk: @@ 9...,"['import java.io.*;\n', 'import java.util.Rand...","['import java.io.File;\n', 'import java.io.Fil...",*********************************************...,*********************************************...,9,13,9,11,,15,Java
1,hawtjni-runtime/src/main/java/org/fusesource/h...,"@@ -206,16 +204,19 @@ final public String getL...","@@ -206,16 +204,19 @@ final public String getL...",[' private boolean exractAndLoad(ArrayList...,[' private boolean exractAndLoad(ArrayList...,<bound method Hunk.source_lines of <Hunk: @@ 2...,<bound method Hunk.target_lines of <Hunk: @@ 2...,"['\n', ' String []libNameParts = ma...","[' \n', ' \n', ' ...",private boolean exractAndLoad(ArrayList<St...,private boolean exractAndLoad(ArrayList<St...,206,16,204,19,final public String getLibraryFileName() {,23,Java
2,hawtjni-runtime/src/main/java/org/fusesource/h...,"@@ -224,8 +225,8 @@ private boolean exractAndL...","@@ -224,8 +225,8 @@ private boolean exractAndL...","[' \n', ' // Fall back...","[' \n', ' // Fall back...",<bound method Hunk.source_lines of <Hunk: @@ 2...,<bound method Hunk.target_lines of <Hunk: @@ 2...,"[' File target = extract(errors, re...","[' File target = file(customPath, m...",\n // Fall back to extr...,\n // Fall back to extr...,224,8,225,8,private boolean exractAndLoad(ArrayList<String...,10,Java
3,hawtjni-runtime/src/main/java/org/fusesource/h...,"@@ -259,67 +260,45 @@ private String map(Strin...","@@ -259,67 +260,45 @@ private String map(Strin...","[' return libName;\n', ' }\n', ' \...","[' return libName;\n', ' }\n', ' \...",<bound method Hunk.source_lines of <Hunk: @@ 2...,<bound method Hunk.target_lines of <Hunk: @@ 2...,[' private File extract(ArrayList<String> e...,[' private boolean extract(ArrayList<String...,return libName;\n }\n\n private ...,return libName;\n }\n\n private ...,259,67,260,45,private String map(String libName) {,85,Java
4,drivers/usb/chipidea/host.c,"@@ -70,6 +70,9 @@ static int host_start(struct...","@@ -70,6 +70,9 @@ static int host_start(struct...","[' \telse\n', ' \t\tci->hcd = hcd;\n', ' \n', ...","[' \telse\n', ' \t\tci->hcd = hcd;\n', ' \n', ...",<bound method Hunk.source_lines of <Hunk: @@ 7...,<bound method Hunk.target_lines of <Hunk: @@ 7...,['\tif (ci->platdata->flags & CI13XXX_DISABLE_...,[],\telse\n\t\tci->hcd = hcd;\n\n\treturn ret;\n}...,\telse\n\t\tci->hcd = hcd;\n\n\tif (ci->platda...,70,6,70,9,static int host_start(struct ci13xxx *ci),9,C
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1875,src/lib/filters.c,"@@ -1205,13 +1205,19 @@ void print_abinary(cha...","@@ -1205,13 +1205,19 @@ void print_abinary(cha...","[' \t\t\t}\n', ' \t\t}\n', ' \t} else if (filt...","[' \t\t\t}\n', ' \t\t}\n', ' \t} else if (filt...",<bound method Hunk.source_lines of <Hunk: @@ 1...,<bound method Hunk.target_lines of <Hunk: @@ 1...,"['\t\tsize_t count, masklen;\n', '\n', '\t\tma...","['\t\tint count;\n', '\t\tfor (count = 0; coun...",\t\t\t}\n\t\t}\n\t} else if (filter->type == R...,\t\t\t}\n\t\t}\n\t} else if (filter->type == R...,1205,13,1205,19,"void print_abinary(char *out, size_t outlen, u...",21,C
1876,src/lib/filters.c,"@@ -1222,7 +1228,7 @@ void print_abinary(char ...","@@ -1222,7 +1228,7 @@ void print_abinary(char ...","[' \t\toutlen--;\n', ' \n', ' \t\t/* show the ...","[' \t\toutlen--;\n', ' \n', ' \t\t/* show the ...",<bound method Hunk.source_lines of <Hunk: @@ 1...,<bound method Hunk.target_lines of <Hunk: @@ 1...,['\t\tfor (count = 0; count < masklen; count++...,['\t\tfor (count = 0; count < ntohs(filter->u....,\t\toutlen--;\n\n\t\t/* show the value */\n\t\...,\t\toutlen--;\n\n\t\t/* show the value */\n\t\...,1222,7,1228,7,"void print_abinary(char *out, size_t outlen, u...",8,C
1877,channels/drive/client/drive_main.c,"@@ -629,6 +629,9 @@ static UINT drive_process_...","@@ -629,6 +629,9 @@ static UINT drive_process_...","[' \tStream_Read_UINT32(irp->input, PathLength...","[' \tStream_Read_UINT32(irp->input, PathLength...",<bound method Hunk.source_lines of <Hunk: @@ 6...,<bound method Hunk.target_lines of <Hunk: @@ 6...,"['\tif (!Stream_CheckAndLogRequiredLength(TAG,...",[],"\tStream_Read_UINT32(irp->input, PathLength);\...","\tStream_Read_UINT32(irp->input, PathLength);\...",629,6,629,9,static UINT drive_process_irp_query_directory(...,9,C
1878,zerver/middleware.py,"@@ -25,6 +25,7 @@\n from django.shortcuts impo...","@@ -25,6 +25,7 @@\n from django.shortcuts impo...","[' from django.shortcuts import render\n', ' f...","[' from django.shortcuts import render\n', ' f...",<bound method Hunk.source_lines of <Hunk: @@ 2...,<bound method Hunk.target_lines of <Hunk: @@ 2...,['from django.utils.crypto import constant_tim...,[],from django.shortcuts import render\nfrom djan...,from django.shortcuts import render\nfrom djan...,25,6,25,7,,7,Python


In [13]:
index = 789
prompt = """
Original code:

""" + df_hunk_filter.code_before[index] + """

Modified code:
    
    """ + df_hunk_filter.code_after[index] + """

    What is the modification related in the original code?
    """

print(prompt)


Original code:

	return 0;

unmap_pages:
	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
	return r;
}



Modified code:
    
    	return 0;

unmap_pages:
	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
	return r;
}



    What is the modification related in the original code?
    


In [None]:
import re

def replace_whitespaces_N21(code):
    # Use regular expression to replace multiple whitespaces with a single space
    return re.sub(r'\s+', ' ', code).strip()


# Replace multiple whitespaces with a single space
formatted_original_code = replace_whitespaces_N21(original_code)
formatted_new_code = replace_whitespaces_N21(new_code)

match = formatted_original_code == formatted_new_code
print(f'Match: {match}')

Formatted Original Code:
raw2image_ex(subtract_inline); // allocate imgdata.image and copy data! int save_4color = O.four_color_rgb; if (IO.zero_is_bad) {

Formatted New Code:
raw2image_ex(subtract_inline); // allocate imgdata.image and copy data! int save_4color = O.four_color_rgb; if (IO.zero_is_bad) {
Match: True


In [None]:
def show_tables(conn):
    # List all tables in the database
    query = "SELECT name FROM sqlite_master WHERE type='table';"
    tables = pd.read_sql_query(query, conn)
    print("Tables in the database:", tables['name'].tolist())

show_tables(conn)

Tables in the database: ['cve', 'repository', 'hunk_collection', 'patch_collection']


In [None]:
mask = df_patch.message.value_counts()
mask = mask[mask <= 1].index
df_patch_1 = df_patch[df_patch.message.isin(mask)].reset_index(drop=True)
print(f'Original patches shape: {df_patch_1.shape}')

Original patches shape: (722, 10)


In [None]:
df_repository.cveId

0       CVE-2013-2873
1       CVE-2013-2080
2       CVE-2013-2126
3       CVE-2013-2204
4       CVE-2013-2634
            ...      
1981    CVE-2005-4798
1982    CVE-2005-4881
1983    CVE-2005-4635
1984    CVE-2005-1041
1985    CVE-2005-1767
Name: cveId, Length: 1986, dtype: object

In [None]:
df_patch_1

Unnamed: 0,file,patch_info,programming_language,source_file,source_timestamp,target_file,target_timestamp,is_binary_file,url,message
0,src/libraw_cxx.cpp,diff --git a/src/libraw_cxx.cpp b/src/libraw_c...,C++,a/src/libraw_cxx.cpp,,b/src/libraw_cxx.cpp,,False,https://github.com/LibRaw/LibRaw/commit/2f912f...,[PATCH] fixed wrong data_maximum calcluation; ...
1,hawtjni-runtime/src/main/java/org/fusesource/h...,diff --git a/hawtjni-runtime/src/main/java/org...,Java,a/hawtjni-runtime/src/main/java/org/fusesource...,,b/hawtjni-runtime/src/main/java/org/fusesource...,,False,https://github.com/fusesource/hawtjni/commit/9...,[PATCH] Simplify shared lib extraction.
2,drivers/usb/chipidea/host.c,diff --git a/drivers/usb/chipidea/host.c b/dri...,C,a/drivers/usb/chipidea/host.c,,b/drivers/usb/chipidea/host.c,,False,https://github.com/torvalds/linux/commit/92947...,[PATCH] usb: chipidea: Allow disabling streami...
3,net/key/af_key.c,diff --git a/net/key/af_key.c b/net/key/af_key...,C,a/net/key/af_key.c,,b/net/key/af_key.c,,False,https://github.com/torvalds/linux/commit/a5cc6...,[PATCH] af_key: fix info leaks in notify messages
4,arch/x86/kernel/cpu/perf_event_intel.c,diff --git a/arch/x86/kernel/cpu/perf_event_in...,C,a/arch/x86/kernel/cpu/perf_event_intel.c,,b/arch/x86/kernel/cpu/perf_event_intel.c,,False,https://github.com/torvalds/linux/commit/f1923...,[PATCH] perf/x86: Fix offcore_rsp valid mask f...
...,...,...,...,...,...,...,...,...,...,...
717,tensorflow/lite/kernels/internal/reference/con...,diff --git a/tensorflow/lite/kernels/internal/...,C,a/tensorflow/lite/kernels/internal/reference/c...,,b/tensorflow/lite/kernels/internal/reference/c...,,False,https://github.com/tensorflow/tensorflow/commi...,[PATCH] Fix a potential buffer overflow issue ...
718,src/lib/filters.c,diff --git a/src/lib/filters.c b/src/lib/filte...,C,a/src/lib/filters.c,,b/src/lib/filters.c,,False,https://github.com/FreeRADIUS/freeradius-serve...,[PATCH] manual port of commit 5906bfa1
719,modules/engage-paella-player/src/main/paella-o...,diff --git a/modules/engage-paella-player/src/...,HTML,a/modules/engage-paella-player/src/main/paella...,,b/modules/engage-paella-player/src/main/paella...,,False,https://github.com/opencast/opencast/commit/d2...,[PATCH] only redirect exact hostname matches
720,channels/drive/client/drive_main.c,diff --git a/channels/drive/client/drive_main....,C,a/channels/drive/client/drive_main.c,,b/channels/drive/client/drive_main.c,,False,https://github.com/FreeRDP/FreeRDP/commit/6655...,[PATCH] Fixed missing stream length check in




Load the datasets, Large Language Model (LLM), tokenizer, and configurator. Do not worry if you do not understand yet all of those components - they will be described and discussed later in the notebook.

In [None]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig

  from .autonotebook import tqdm as notebook_tqdm


<a name='2'></a>
## 2 - Summarize Dialogue without Prompt Engineering

In this use case, you will be generating a summary of a dialogue with the pre-trained Large Language Model (LLM) FLAN-T5 from Hugging Face. The list of available models in the Hugging Face `transformers` package can be found [here](https://huggingface.co/docs/transformers/index). 

Let's upload some simple dialogues from the [DialogSum](https://huggingface.co/datasets/knkarthick/dialogsum) Hugging Face dataset. This dataset contains 10,000+ dialogues with the corresponding manually labeled summaries and topics. 

In [None]:
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

Downloading readme: 100%|██████████| 4.65k/4.65k [00:00<00:00, 3.59MB/s]
Downloading data: 100%|██████████| 11.3M/11.3M [00:01<00:00, 8.47MB/s]
Downloading data: 100%|██████████| 442k/442k [00:00<00:00, 1.03MB/s]
Downloading data: 100%|██████████| 1.35M/1.35M [00:00<00:00, 3.78MB/s]
Generating train split: 12460 examples [00:00, 69058.37 examples/s]
Generating validation split: 500 examples [00:00, 46219.24 examples/s]
Generating test split: 1500 examples [00:00, 62850.45 examples/s]


In [None]:
from datasets import Dataset, DatasetDict

# Split the DataFrame into train, validation, and test sets
train_df = df.iloc[:800]
validation_df = df.iloc[800:900]
test_df = df.iloc[900:]

# Convert DataFrame to list of dictionaries
def df_to_dicts(df):
    return [
        {
            'id': i,
            'dialogue': row['code_before'],
            'summary': row['code_after'],
            'topic': '', # Optional field
        }
        for i, row in df.iterrows()
    ]

# Create Dataset objects
train_dataset = Dataset.from_dict({'id': list(train_df.index), 'dialogue': train_df['code_before'], 'summary': train_df['code_after'], 'topic': [''] * len(train_df)})
validation_dataset = Dataset.from_dict({'id': list(validation_df.index), 'dialogue': validation_df['code_before'], 'summary': validation_df['code_after'], 'topic': [''] * len(validation_df)})
test_dataset = Dataset.from_dict({'id': list(test_df.index), 'dialogue': test_df['code_before'], 'summary': test_df['code_after'], 'topic': [''] * len(test_df)})

# Create DatasetDict with the desired format
mydataset = DatasetDict({
    'train': train_dataset,
    'validation': validation_dataset,
    'test': test_dataset
})
mydataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 800
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 100
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 25818
    })
})

In [None]:
example_indices = [40, 200]

dash_line = '-'.join('' for x in range(50))

for i, index in enumerate(example_indices):
    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print('INPUT DIALOGUE:')
    print(mydataset['test'][index]['dialogue'])
    print(dash_line)
    print('BASELINE HUMAN SUMMARY:')
    print(mydataset['test'][index]['summary'])
    print(dash_line)
    print()

-------------------------------------------------
Example  1
-------------------------------------------------
INPUT DIALOGUE:
	if (!skb)
		return err;

	msg->msg_namelen = 0;

	copied = skb->len;
	if (len < copied) {
		msg->msg_flags |= MSG_TRUNC;

-------------------------------------------------
BASELINE HUMAN SUMMARY:
	if (!skb)
		return err;

	copied = skb->len;
	if (len < copied) {
		msg->msg_flags |= MSG_TRUNC;

-------------------------------------------------

-------------------------------------------------
Example  2
-------------------------------------------------
INPUT DIALOGUE:
		sin->sin_port = udp_hdr(skb)->source;
		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
	}
	if (inet->cmsg_flags)
		ip_cmsg_recv(msg, skb);

-------------------------------------------------
BASELINE HUMAN SUMMARY:
		sin->sin_port = udp_hdr(skb)->source;
		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));


Print a couple of dialogues with their baseline summaries.

Load the [FLAN-T5 model](https://huggingface.co/docs/transformers/model_doc/flan-t5), creating an instance of the `AutoModelForSeq2SeqLM` class with the `.from_pretrained()` method. 

In [None]:
# model_name='google/flan-t5-base'
model_name='Salesforce/codet5-base'

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

To perform encoding and decoding, you need to work with text in a tokenized form. **Tokenization** is the process of splitting texts into smaller units that can be processed by the LLM models. 

Download the tokenizer for the FLAN-T5 model using `AutoTokenizer.from_pretrained()` method. Parameter `use_fast` switches on fast tokenizer. At this stage, there is no need to go into the details of that, but you can find the tokenizer parameters in the [documentation](https://huggingface.co/docs/transformers/v4.28.1/en/model_doc/auto#transformers.AutoTokenizer).

Test the tokenizer encoding and decoding a simple sentence:

In [None]:
from transformers import RobertaTokenizer, T5ForConditionalGeneration

tokenizer = RobertaTokenizer.from_pretrained("Salesforce/codet5p-220m")
model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5p-220m")


In [None]:
text = "def greet(user): print(f'hello <extra_id_0>!')"
# text = "def add(a, b): \n int sum= a + b \n return sum"
input_ids = tokenizer(text, return_tensors="pt").input_ids

# simply generate a single sequence
generated_ids = model.generate(input_ids, max_length=8)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
# this prints "{user.username}"

: %s: %


Now it's time to explore how well the base LLM summarizes a dialogue without any prompt engineering. **Prompt engineering** is an act of a human changing the **prompt** (input) to improve the response for a given task.

In [None]:
for i, index in enumerate(example_indices):
    dialogue = mydataset['test'][index]['dialogue']
    summary = mydataset['test'][index]['summary']
    
    inputs = tokenizer(dialogue, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )
    
    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{dialogue}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash_line)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')

-------------------------------------------------
Example  1
-------------------------------------------------
INPUT PROMPT:
	if (!skb)
		return err;

	msg->msg_namelen = 0;

	copied = skb->len;
	if (len < copied) {
		msg->msg_flags |= MSG_TRUNC;

-------------------------------------------------
BASELINE HUMAN SUMMARY:
	if (!skb)
		return err;

	copied = skb->len;
	if (len < copied) {
		msg->msg_flags |= MSG_TRUNC;

-------------------------------------------------
MODEL GENERATION - WITHOUT PROMPT ENGINEERING:
/*
 * Copyright (c) 2008-2021, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not

-------------------------------------------------
Example  2
-------------------------------------------------
INPUT PROMPT:
		sin->sin_port = udp_hdr(skb)->source;
		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
	}
	if (inet->cmsg_flags)
		ip_cmsg_recv(msg, skb);

--------------

You can see that the guesses of the model make some sense, but it doesn't seem to be sure what task it is supposed to accomplish. Seems it just makes up the next sentence in the dialogue. Prompt engineering can help here.

<a name='3'></a>
## 3 - Summarize Dialogue with an Instruction Prompt

Prompt engineering is an important concept in using foundation models for text generation. You can check out [this blog](https://www.amazon.science/blog/emnlp-prompt-engineering-is-the-new-feature-engineering) from Amazon Science for a quick introduction to prompt engineering.

<a name='3.1'></a>
### 3.1 - Zero Shot Inference with an Instruction Prompt

In order to instruct the model to perform a task - summarize a dialogue - you can take the dialogue and convert it into an instruction prompt. This is often called **zero shot inference**.  You can check out [this blog from AWS](https://aws.amazon.com/blogs/machine-learning/zero-shot-prompting-for-the-flan-t5-foundation-model-in-amazon-sagemaker-jumpstart/) for a quick description of what zero shot learning is and why it is an important concept to the LLM model.

Wrap the dialogue in a descriptive instruction and see how the generated text will change:

In [None]:
for i, index in enumerate(example_indices):
    dialogue = mydataset['test'][index]['dialogue']
    summary = mydataset['test'][index]['summary']

    prompt = f"""
Vulnerable C program:

{dialogue}

Patch of the program:
    """

    # Input constructed prompt instead of the dialogue.
    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=100,
        )[0], 
        skip_special_tokens=True
    )
    
    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash_line)    
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

-------------------------------------------------
Example  1
-------------------------------------------------
INPUT PROMPT:

Vulnerable C program:

	if (!skb)
		return err;

	msg->msg_namelen = 0;

	copied = skb->len;
	if (len < copied) {
		msg->msg_flags |= MSG_TRUNC;


Patch of the program:
    
-------------------------------------------------
BASELINE HUMAN SUMMARY:
	if (!skb)
		return err;

	copied = skb->len;
	if (len < copied) {
		msg->msg_flags |= MSG_TRUNC;

-------------------------------------------------
MODEL GENERATION - ZERO SHOT:
/*
 * Copyright (c) 2008-2021, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by

-------------------------------------------------
Example  2
-------------------------------------------------
INPUT PROMPT

In [None]:
import evaluate
# summary
# output
bleu = evaluate.load('bleu')
bleu.compute(predictions=[output], references=[summary])

{'bleu': 0.0,
 'precisions': [0.15, 0.02531645569620253, 0.01282051282051282, 0.0],
 'brevity_penalty': 0.9162188716508777,
 'length_ratio': 0.9195402298850575,
 'translation_length': 80,
 'reference_length': 87}

In [None]:
from evaluate import load
import os
os.environ["HF_ALLOW_CODE_EVAL"] = "1"

code_eval = load("code_eval")
test_cases = [summary]
candidates = [[output]]
pass_at_k, results = code_eval.compute(references=test_cases, predictions=candidates, k=[1 ,5, 10])
print(pass_at_k)

{'pass@1': 0.0}


In [None]:
from evaluate import load
super_glue_metric = load('super_glue', 'copa') 
predictions = [0, 1]
references = [0, 0]
results = super_glue_metric.compute(predictions=predictions, references=references)
print(results)

{'accuracy': 0.5}


This is much better! But the model still does not pick up on the nuance of the conversations though.

**Exercise:**

- Experiment with the `prompt` text and see how the inferences will be changed. Will the inferences change if you end the prompt with just empty string vs. `Summary: `?
- Try to rephrase the beginning of the `prompt` text from `Summarize the following conversation.` to something different - and see how it will influence the generated output.

<a name='3.2'></a>
### 3.2 - Zero Shot Inference with the Prompt Template from FLAN-T5

Let's use a slightly different prompt. FLAN-T5 has many prompt templates that are published for certain tasks [here](https://github.com/google-research/FLAN/tree/main/flan/v2). In the following code, you will use one of the [pre-built FLAN-T5 prompts](https://github.com/google-research/FLAN/blob/main/flan/v2/templates.py):

In [None]:
for i, index in enumerate(example_indices):
    dialogue = mydataset['test'][index]['dialogue']
    summary = mydataset['test'][index]['summary']
        
    prompt = f"""
Vulnerable C program:

{dialogue}

What is the patch of the program?
"""

    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )

    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
    print(dash_line)
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

---------------------------------------------------------------------------------------------------
Example  1
---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Vulnerable C program:

int au1100fb_fb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
{
	struct au1100fb_device *fbdev;
	unsigned int len;
	unsigned long start=0, off;

	fbdev = to_au1100fb_device(fbi);

	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) {
		return -EINVAL;
	}

	start = fbdev->fb_phys & PAGE_MASK;
	len = PAGE_ALIGN((start & ~PAGE_MASK) + fbdev->fb_len);

	off = vma->vm_pgoff << PAGE_SHIFT;

	if ((vma->vm_end - vma->vm_start + off) > len) {
		return -EINVAL;
	}

	off += start;
	vma->vm_pgoff = off >> PAGE_SHIFT;

	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
	pgprot_val(vma->vm_page_prot) |= (6 << 9); //CCA=6

	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
				vma->vm_end - vma->vm_start,
				vma->vm_page_prot)) {
		retur

Notice that this prompt from FLAN-T5 did help a bit, but still struggles to pick up on the nuance of the conversation. This is what you will try to solve with the few shot inferencing.

<a name='4'></a>
## 4 - Summarize Dialogue with One Shot and Few Shot Inference

**One shot and few shot inference** are the practices of providing an LLM with either one or more full examples of prompt-response pairs that match your task - before your actual prompt that you want completed. This is called "in-context learning" and puts your model into a state that understands your specific task.  You can read more about it in [this blog from HuggingFace](https://huggingface.co/blog/few-shot-learning-gpt-neo-and-inference-api).

<a name='4.1'></a>
### 4.1 - One Shot Inference

Let's build a function that takes a list of `example_indices_full`, generates a prompt with full examples, then at the end appends the prompt which you want the model to complete (`example_index_to_summarize`).  You will use the same FLAN-T5 prompt template from section [3.2](#3.2). 

In [None]:
def make_prompt(example_indices_full, example_index_to_summarize):
    prompt = ''
    for index in example_indices_full:
        dialogue = mydataset['test'][index]['dialogue']
        summary = mydataset['test'][index]['summary']
        
        # The stop sequence '{summary}\n\n\n' is important for FLAN-T5. Other models may have their own preferred stop sequence.
        prompt += f"""
Vulerable C program:

{dialogue}

What is the patch of the program?

{summary}

"""
    
    dialogue = mydataset['test'][example_index_to_summarize]['dialogue']
    
    prompt += f"""
Vulerable C program:

{dialogue}

What is the patch of the program?
"""
        
    return prompt

Construct the prompt to perform one shot inference:

In [None]:
example_indices_full = [40]
example_index_to_summarize = 200

one_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)

print(one_shot_prompt)


Vulerable C program:

int au1100fb_fb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
{
	struct au1100fb_device *fbdev;
	unsigned int len;
	unsigned long start=0, off;

	fbdev = to_au1100fb_device(fbi);

	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) {
		return -EINVAL;
	}

	start = fbdev->fb_phys & PAGE_MASK;
	len = PAGE_ALIGN((start & ~PAGE_MASK) + fbdev->fb_len);

	off = vma->vm_pgoff << PAGE_SHIFT;

	if ((vma->vm_end - vma->vm_start + off) > len) {
		return -EINVAL;
	}

	off += start;
	vma->vm_pgoff = off >> PAGE_SHIFT;

	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
	pgprot_val(vma->vm_page_prot) |= (6 << 9); //CCA=6

	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
				vma->vm_end - vma->vm_start,
				vma->vm_page_prot)) {
		return -EAGAIN;
	}

	return 0;
}

static struct fb_ops au1100fb_ops =


What is the patch of the program?

int au1100fb_fb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
{
	struct au1100fb_device *fbdev;

	fbdev = to_au1100f

Now pass this prompt to perform the one shot inference:

In [None]:
summary = mydataset['test'][example_index_to_summarize]['summary']

inputs = tokenizer(one_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=100,
    )[0], 
    skip_special_tokens=True
)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ONE SHOT:\n{output}')

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
	/* First pass: copy the tree topology */
	copy_flags = CL_COPY_ALL | CL_EXPIRE;
	if (user_ns != mnt_ns->user_ns)
		copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
	if (IS_ERR(new)) {
		up_write(&namespace_sem);


---------------------------------------------------------------------------------------------------
MODEL GENERATION - ONE SHOT:
(vma)(vma)->->-> vm_flags |=-> vm_flags |=-> vm_flags-> vm_flags-> vm_flags |=-> vm_flags |=-> vm_flags |=-> vm_flags-> vm_flags |=-> vm_flags-> vm_flags |=-> vm_flags |=-> vm_flags-> vm_flags |=-> vm_flags


<a name='4.2'></a>
### 4.2 - Few Shot Inference

Let's explore few shot inference by adding two more full dialogue-summary pairs to your prompt.

In [None]:
example_indices_full = [30, 10, 150]
example_index_to_summarize = 300

few_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)

print(few_shot_prompt)


Vulerable C program:

	rcu_assign_pointer(vq->private_data, oldsock);
	vhost_net_enable_vq(n, vq);
	if (ubufs)
		vhost_net_ubuf_put_and_wait(ubufs);
err_ubufs:
	fput(sock->file);
err_vq:


What is the patch of the program?

	rcu_assign_pointer(vq->private_data, oldsock);
	vhost_net_enable_vq(n, vq);
	if (ubufs)
		vhost_net_ubuf_put_wait_and_free(ubufs);
err_ubufs:
	fput(sock->file);
err_vq:



Vulerable C program:

	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
	struct pmu *leader_pmu = event->group_leader->pmu;

	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
		return 1;



What is the patch of the program?

	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
	struct pmu *leader_pmu = event->group_leader->pmu;

	if (is_software_event(event))
		return 1;

	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
		return 1;




Vulerable C program:

    pkiDebug("found kdcPkId in AS REQ\n");
    is = d2i_PKCS7_ISSUER_AND_SERIAL(NULL, &p, (int)pkid_le

Now pass this prompt to perform a few shot inference:

In [None]:

dialogue = mydataset['test']['dialogue'][example_index_to_summarize]
summary = mydataset['test']['summary'][example_index_to_summarize]

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=100,
    )[0], 
    skip_special_tokens=True
)
print(dash_line)
print(f'Prompt:\n{dialogue}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')

-------------------------------------------------
Prompt:
    }
    return -1;
  }
  int64_t pid = -1;
  sscanf(buf, "%" PRId64, &pid);
  assert(pid);
  return (pid_t)pid;
}


-------------------------------------------------
BASELINE HUMAN SUMMARY:
    }
    return -1;
  }
  always_assert(buf == "success");
  int64_t pid = -1;
  lwp_read_int64(fin, pid);
  always_assert(pid);
  return (pid_t)pid;
}



-------------------------------------------------
MODEL GENERATION - FEW SHOT:
 return -1;  int64_t, pid = -1; sscanf(buf, "%" PRId64, &pid); assert(pid); return (pid_t)pid; 


In this case, few shot did not provide much of an improvement over one shot inference.  And, anything above 5 or 6 shot will typically not help much, either.  Also, you need to make sure that you do not exceed the model's input-context length which, in our case, if 512 tokens.  Anything above the context length will be ignored.

However, you can see that feeding in at least one full example (one shot) provides the model with more information and qualitatively improves the summary overall.

**Exercise:**

Experiment with the few shot inferencing.
- Choose different dialogues - change the indices in the `example_indices_full` list and `example_index_to_summarize` value.
- Change the number of shots. Be sure to stay within the model's 512 context length, however.

How well does few shot inferencing work with other examples?

<a name='5'></a>
## 5 - Generative Configuration Parameters for Inference

You can change the configuration parameters of the `generate()` method to see a different output from the LLM. So far the only parameter that you have been setting was `max_new_tokens=50`, which defines the maximum number of tokens to generate. A full list of available parameters can be found in the [Hugging Face Generation documentation](https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationConfig). 

A convenient way of organizing the configuration parameters is to use `GenerationConfig` class. 

**Exercise:**

Change the configuration parameters to investigate their influence on the output. 

Putting the parameter `do_sample = True`, you activate various decoding strategies which influence the next token from the probability distribution over the entire vocabulary. You can then adjust the outputs changing `temperature` and other parameters (such as `top_k` and `top_p`). 

Uncomment the lines in the cell below and rerun the code. Try to analyze the results. You can read some comments below.

In [None]:
# generation_config = GenerationConfig(max_new_tokens=50)
# generation_config = GenerationConfig(max_new_tokens=10)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)
generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=2.0)

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        generation_config=generation_config,
    )[0], 
    skip_special_tokens=True
)

print(dash_line)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')

---------------------------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT:
According to personal views by Person 2, Trump deserves re presidency, especially Trump, but does think Trump needs to be more of an outtake from the country. He will do not win by giving the US only the President of the U.S.
---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# is crazy for Trump and voted for him. #Person2# doesn't agree with #Person1# on Trump and will vote for Biden.



Comments related to the choice of the parameters in the code cell above:
- Choosing `max_new_tokens=10` will make the output text too short, so the dialogue summary will be cut.
- Putting `do_sample = True` and changing the temperature value you get more flexibility in the output.

As you can see, prompt engineering can take you a long way for this use case, but there are some limitations. Next, you will start to explore how you can use fine-tuning to help your LLM to understand a particular use case in better depth!

# # Fine-Tune a Generative AI Model for Dialogue Summarization

In [None]:
%pip install -U datasets==2.17.0

%pip install --upgrade pip
%pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1 --quiet

%pip install \
    transformers==4.27.2 \
    evaluate==0.4.0 \
    rouge_score==0.1.2 \
    loralib==0.1.1 \
    peft==0.3.0 --quiet

In [None]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

In [None]:
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(original_model))

trainable model parameters: 222882048
all model parameters: 222882048
percentage of trainable model parameters: 100.00%


### 1.3 - Test the Model with Zero Shot Inferencing

In [None]:
index = 200

dialogue = mydataset['test'][index]['dialogue']
summary = mydataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    original_model.generate(
        inputs["input_ids"], 
        max_new_tokens=200,
    )[0], 
    skip_special_tokens=True
)

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Summarize the following conversation.

	/* First pass: copy the tree topology */
	copy_flags = CL_COPY_ALL | CL_EXPIRE;
	if (user_ns != mnt_ns->user_ns)
		copy_flags |= CL_SHARED_TO_SLAVE;
	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
	if (IS_ERR(new)) {
		up_write(&namespace_sem);


Summary:

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
	/* First pass: copy the tree topology */
	copy_flags = CL_COPY_ALL | CL_EXPIRE;
	if (user_ns != mnt_ns->user_ns)
		copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
	if (IS_ERR(new)) {
		up_write(&namespace_sem);


---------------------------------------------------------------------------------------------------
MODEL GENERATION - ZERO SHOT:
new; }new; }new; }new; }new; }


<a name='2'></a>
## 2 - Perform Full Fine-Tuning

In [None]:
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids
    
    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = mydataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map: 100%|██████████| 800/800 [00:00<00:00, 1773.04 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 1868.36 examples/s]
Map: 100%|██████████| 2172/2172 [00:00<00:00, 2198.04 examples/s]


In [None]:
tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)

print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")
print(f"Validation: {tokenized_datasets['validation'].shape}")
print(f"Test: {tokenized_datasets['test'].shape}")

print(tokenized_datasets)

Filter: 100%|██████████| 1/1 [00:00<00:00, 76.73 examples/s]


Filter: 100%|██████████| 1/1 [00:00<00:00, 180.73 examples/s]
Filter: 100%|██████████| 1/1 [00:00<00:00, 33.17 examples/s]

Shapes of the datasets:
Training: (1, 2)
Validation: (1, 2)
Test: (1, 2)
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 1
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 1
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 1
    })
})





### 2.2 - Fine-Tune the Model with the Preprocessed Dataset

In [None]:
output_dir = f'models/vul-fix-training-{str(int(time.time()))}'

training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=1e-5,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_steps=1,
    max_steps=1
)

trainer = Trainer(
    model=original_model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

In [None]:
trainer.train()

100%|██████████| 1/1 [08:11<00:00, 491.27s/it]

{'loss': 3.625, 'learning_rate': 0.0, 'epoch': 1.0}
{'train_runtime': 491.2789, 'train_samples_per_second': 0.016, 'train_steps_per_second': 0.002, 'train_loss': 3.625, 'epoch': 1.0}





TrainOutput(global_step=1, training_loss=3.625, metrics={'train_runtime': 491.2789, 'train_samples_per_second': 0.016, 'train_steps_per_second': 0.002, 'train_loss': 3.625, 'epoch': 1.0})

In [None]:
# Save the trained model
trainer.save_model(f'models/instruct_model-{output_dir}')

NameError: name 'trainer' is not defined

In [None]:
instruct_model = AutoModelForSeq2SeqLM.from_pretrained(f'models/instruct_model-{output_dir}', torch_dtype=torch.bfloat16)

NameError: name 'AutoModelForSeq2SeqLM' is not defined

### 2.3 - Evaluate the Model Qualitatively (Human Evaluation)

In [None]:
index = 200
dialogue = mydataset['test'][index]['dialogue']
human_baseline_summary = mydataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

input_ids = tokenizer(prompt, return_tensors="pt").input_ids

original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

instruct_model_outputs = instruct_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
instruct_model_text_output = tokenizer.decode(instruct_model_outputs[0], skip_special_tokens=True)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{human_baseline_summary}')
print(dash_line)
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print(dash_line)
print(f'INSTRUCT MODEL:\n{instruct_model_text_output}')

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
	/* First pass: copy the tree topology */
	copy_flags = CL_COPY_ALL | CL_EXPIRE;
	if (user_ns != mnt_ns->user_ns)
		copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
	if (IS_ERR(new)) {
		up_write(&namespace_sem);

---------------------------------------------------------------------------------------------------
ORIGINAL MODEL:
if (IS_ERR(old)){

Summary:; } }new->mnt.mnt_root, new->mnt.mnt_root, copy_flags);new->mnt.mnt_root, copy_flags); }new->mnt.mnt_root, copy_flags); }(new) { if(IS_ERR(new)) {new->mnt.mnt_root; }(new) { if
---------------------------------------------------------------------------------------------------
INSTRUCT MODEL:
new; }new; }new; }new; }new; }


### 2.4 - Evaluate the Model Quantitatively (with ROUGE Metric)

In [None]:
rouge = evaluate.load('rouge')

In [None]:
dialogues = mydataset['test'][0:10]['dialogue']
human_baseline_summaries = mydataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []

for _, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)
    original_model_summaries.append(original_model_text_output)

    instruct_model_outputs = instruct_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    instruct_model_text_output = tokenizer.decode(instruct_model_outputs[0], skip_special_tokens=True)
    instruct_model_summaries.append(instruct_model_text_output)
    
zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, instruct_model_summaries))
 
df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'instruct_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,instruct_model_summaries
0,\tunsigned long mq_bytes;\t/* How many bytes c...,*/*/****** ** ** * ** ** * ** ** * ** ** * ** ...,*/*/))))) ;) ;) ;) ;) ;) ;) ;) ; }
1,"\tsock_wfree(skb);\n}\n\n/*\n * The ""user->uni...",(structstruct sk_buff *skb)skb),skbskb)skb) {skb)skb) {skb)skb) { skb_queue_ta...
2,\tunsigned char max_level = 0;\n\tint unix_soc...,sock_sk(sk);sock_count =sock_count;sock_count;...,sock_get_name(sk);sock_get_name(sk);sock_get_n...
3,\tif (!UNIXCB(skb).fp)\n\t\treturn -ENOMEM;\n\...,(scm->fp->count) {if (scm->fp->count)\nForeach...,{=(skb).fp;if(!UNIXCB(skb).fp)\n\nif (unix_soc...
4,{\n\tstruct sock *s = unix_get_socket(fp);\n\n...,{\n\nIntegrity:{}} }}} }{(s) {(s) {(s),{\n\nif (atomic_long_inc_return(&u->inflight) ...
5,\t\t\tBUG_ON(list_empty(&u->link));\n\t\t}\n\t...,if (atomic_long_dec_and_test(&u->inflight))\n\...,(x) {(x) {(x) {(x) {(x) {(x) {(x) {(x) {(x) {(...
6,\tltv_t *pLtv;\n\tbool_t ...,HCF_PORT_0 functionHCF_PORT_0HCF_PORT_0HCF_PO...,HCF_PORT_0;HCF_PORT_0;HCF_PORT_0;HCF_PORT_0;HC...
7,\t\t\t\t\tbreak;\n\t\t\t\tcase CFG_CNF_OWN_NAM...,(void *)pLtv->u.u16[0];(void *)pLtv->u.u16[0];...,CNV_INT_TO_LITTLE(pLtv->u.u16[0]);CNV_INT_TO_L...
8,{\n\tstruct wl_private *lp = wl_priv(dev);\n\t...,{flags = wl_get_cond_volatile(dev);devflags=lp...,flagsflags = 0;flags =flagsflagsflagsflags =fl...
9,"\twl_lock(lp, &flags);\n\n\tmemset(lp->Station...",(wrqu->data)get\n\nif (wrqu->data.length) {(lp...,"((( lp, &flags);"


In [None]:
original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

instruct_model_results = rouge.compute(
    predictions=instruct_model_summaries,
    references=human_baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)

In [None]:
results = pd.read_csv("data/dialogue-summary-training-results.csv")

human_baseline_summaries = results['human_baseline_summaries'].values
original_model_summaries = results['original_model_summaries'].values
instruct_model_summaries = results['instruct_model_summaries'].values

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

instruct_model_results = rouge.compute(
    predictions=instruct_model_summaries,
    references=human_baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)

In [None]:
print("Absolute percentage improvement of INSTRUCT MODEL over ORIGINAL MODEL")

improvement = (np.array(list(instruct_model_results.values())) - np.array(list(original_model_results.values())))
for key, value in zip(instruct_model_results.keys(), improvement):
    print(f'{key}: {value*100:.2f}%')