In [2]:
import google.generativeai as genai
import pandas as pd
from copy import deepcopy


## Prompt preparation

In [3]:
def remove_non_ascii(sentence):
    return ''.join(char for char in sentence if ord(char) < 128)

In [4]:
with open('prompt/prompt_system_etd.txt', 'r') as fp:
    etd_system = fp.readlines()
    
with open('prompt/prompt_system_ps.txt', 'r') as fp:
    ps_system = fp.readlines()

In [5]:
prompt_parts = []
prompt_parts.append(" ".join(etd_system))
prompt_parts

['Use the following step-by-step instructions to respond to user inputs.\n \n Step 1 - The user will provide you with text in triple quotes (Question: """insert text here"""). Find one sentence from the given text which can indicate the answer to "What is the user expecting to do or achieve?". The sentence only describing the system error or undesired behavior should not be considered. Use triple quotes to cite the found sentence. If the given text does not contain such sentence, write "No sentences found to indicate the user\'s expectation. The answer is NO." and skip Step 2 and Step 3.\n \n Step 2 - If found one sentence from Step 1, \n summarize the sentence into one or more patterns delimited by <pattern>..</pattern> according to the syntax and semantic of the sentence. The pattern could be expressed by some keywords and part-of-speech tags from the found sentence, which can explain why this sentence provides the answer to "What is the user expecting to do or achieve?".\n \n Step 3

In [None]:
prompt_parts = []
prompt_parts.append(" ".join(ps_system))
prompt_parts

In [6]:
df_issue = pd.read_csv("prompt/issue.csv", encoding = "ISO-8859-1")
# df_issue = pd.read_csv("prompt/issue_wrong.csv")

df_etd_example = pd.read_csv("experiment/random_pattern/ETD/gemini/etd_prompt_random_with_negno11_gemini.csv")
# df_ps_example = pd.read_csv("experiment/random_pattern/PS/gemini/ps_prompt_random3_with_negno11_gemini.csv")

In [7]:
len(df_issue)

1000

In [8]:
issue_clean = [remove_non_ascii(issue) for issue in df_issue["issue"]]
df_issue["issue"] = issue_clean
df_issue.head()

Unnamed: 0,issue
0,"Hi, when I am trying to apply class dynamicall..."
1,is it possible to get a RouteConfig matched ag...
2,I have angular running from a .Net Core server...
3,"Hello everyone, I want to do sub menu with sea..."
4,I'm struggling with getting this logic [code]....


In [11]:
df_etd_example = df_etd_example.sample(frac=1)
df_etd_example.reset_index(drop=True, inplace=True)
df_etd_example

Unnamed: 0,Pattern,issue,output,12_examples,14_examples
0,ETD_FOR_EXAMPLE,""""""" guys, can somebody help me w/ chained reso...","The sentence """""" e.g. a components needs resul...",1,1
1,,""""""" Hi, I'm having a problem with following co...",No sentences found to indicate the user's expe...,0,1
2,ETD_MUST,""""""" Hi all, I am looking for a schedular simil...","The sentence """""" must include timeline view, m...",1,1
3,ETD_WANT_TO,""""""" How do I set a property of a component wit...","The sentence """""" Let's say I want to set the w...",0,1
4,ETD_REQUIRES,""""""" LSTM layers require quite a bit of memory,...","The sentence """""" LSTM layers require quite a b...",0,0
5,ETD_IMPLEMENT,""""""" Hi Guys, I have deployed Angular6 produtio...","The sentence """""" I have deployed Angular6 prod...",0,0
6,ETD_IMPLEMENT,""""""" I have build large SPAs with Angular, but ...","The sentence """""" I have build large SPAs with ...",1,1
7,ETD_SHOULD,""""""" hey all i am using connect-multiparty pack...","The sentence """""" but it's stated that i should...",1,1
8,ETD_FOR_EXAMPLE,""""""" Any idea on how to provider a service to t...","The sentence """""" It is funny in some places (s...",0,0
9,ETD_CAN_QUESTION,""""""" hey - can i get some n00b help. I have the...","The sentence """""" can i get some n00b help. """"""...",0,0


In [None]:
df_ps_example = df_ps_example.sample(frac=1)
df_ps_example.reset_index(drop=True, inplace=True)
df_ps_example

### for all examples

In [None]:
chat_output = list(df_etd_randexample["output"].values)
# chat_output.extend(list(df_etd_randexample["output_n"].values))
chat_output

In [None]:
# chat_issue = list(df_etd_example["issue"].values)
chat_issue = ["input: " + q_str for q_str in list(df_ps_example["issue"].values)]
chat_issue

In [None]:
# chat_output = list(df_etd_example["output"].values)
chat_output = ["output: " + o_str for o_str in list(df_ps_example["output"].values)]
chat_output

### for random/frequent patterns

In [12]:
df_etd_randexample = df_etd_example[df_etd_example["12_examples"]==1]

chat_issue = ["input: " + q_str for q_str in list(df_etd_randexample["issue"].values)]
# chat_issue.extend(list(df_etd_randexample["issue_n"].values))
chat_issue

['input: """ guys, can somebody help me w/ chained resolvers? What is a proper approach to chain resolves? e.g. a components needs result from resolverA; resolverA builds a query depending on a result from resolverB. """',
 'input: """ Hi all, I am looking for a schedular similar to fullCalendar for angular but a pure free & opensource (must include timeline view , month, year & day view), any recommendations on it would really be of a greater help.... """',
 'input: """ I have build large SPAs with Angular, but now want to add some angular "widgets" to a regular website (think quote of the day/flight status etc). If I use the normal webpack route, I\'m going to end up with a large bundle which duplicates all the angular resources for each simple little widget. Is there an established pattern for using Angular in this way? """',
 'input: """ hey all i am using connect-multiparty package to handle file uploads but it\'s stated that i should remove temp files on the server? what exactly 

In [13]:
chat_output = ["output: " + o_str for o_str in list(df_etd_randexample["output"].values)]
# chat_output.extend(list(df_etd_randexample["output_n"].values))
chat_output

['output: The sentence """ e.g. a components needs result from resolverA; """ uses an example start with <pattern>e.g. a NNS VBZ</pattern> to indicate that the user expects the result from "resolverA". The answer is YES.',
 'output: The sentence """ must include timeline view, month, year & day view """ uses <pattern>must VB</pattern> to indicate that the user expects the calendar to include "timeline view, month, year & day view". The answer is YES.',
 'output: The sentence """ I have build large SPAs with Angular """ uses <pattern>build sth. with</pattern> to indicate that the user expects to "build large SPAs with Angular". The answer is YES.',
 'output: The sentence """ but it\'s stated that i should remove temp files on the server? """ uses <pattern>should VB</pattern> to indicate that the user expects to "remove temp files on the server". The answer is YES.',
 'output: The sentence """ i need help  I\'m working a project that requires having a form & a table to display d data fil

In [None]:
df_ps_randexample = df_ps_example[df_ps_example["14_examples"]==1]

chat_issue = ["input: " + q_str for q_str in list(df_ps_randexample["issue"].values)]
# chat_issue.extend(list(df_ps_randexample["issue_n"].values))
chat_issue

In [None]:
chat_output = ["output: " + o_str for o_str in list(df_ps_randexample["output"].values)]
# chat_output.extend(list(df_ps_randexample["output_n"].values))
chat_output

## Model running

In [None]:
with open("gemini_token.txt", 'r') as fp:
    gemini_token = fp.readline()

In [14]:
genai.configure(api_key=gemini_token)

In [15]:
# Set up the model
generation_config = {
  "temperature": 0.8,
  "top_p": 1,
  "top_k": 1,
  "max_output_tokens": 300,
}

safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

# Create GenerativeModel object
model = genai.GenerativeModel(
    model_name="gemini-pro",
    generation_config=generation_config,
    safety_settings=safety_settings,
)

In [16]:
for eg_input, eg_output in zip(chat_issue, chat_output):
    prompt_parts.append(eg_input)
    prompt_parts.append(eg_output)

prompt_parts

['Use the following step-by-step instructions to respond to user inputs.\n \n Step 1 - The user will provide you with text in triple quotes (Question: """insert text here"""). Find one sentence from the given text which can indicate the answer to "What is the user expecting to do or achieve?". The sentence only describing the system error or undesired behavior should not be considered. Use triple quotes to cite the found sentence. If the given text does not contain such sentence, write "No sentences found to indicate the user\'s expectation. The answer is NO." and skip Step 2 and Step 3.\n \n Step 2 - If found one sentence from Step 1, \n summarize the sentence into one or more patterns delimited by <pattern>..</pattern> according to the syntax and semantic of the sentence. The pattern could be expressed by some keywords and part-of-speech tags from the found sentence, which can explain why this sentence provides the answer to "What is the user expecting to do or achieve?".\n \n Step 3

In [17]:
ans1= []
start_i = 0
for i in range(start_i,len(df_issue),1):
# for i in range(0,10,1):
    issue = df_issue.loc[i]["issue"]
    input_str = "input: \"\"\" {} \"\"\"".format(issue)
    
    prompt = deepcopy(prompt_parts)
    prompt.append(input_str)
    prompt.append("output: ")
    
    response = model.generate_content(prompt)
    ans1.append(response.text)

In [18]:
print(len(ans1))

1000


In [19]:
df = pd.DataFrame()
df["answer1"] = ans1

df.head(10)

Unnamed: 0,answer1
0,"The sentence """""" I am not able to find any mat..."
1,"The sentence """""" I'm trying to create a Breadc..."
2,"The sentence """""" What's the best way for the a..."
3,"The sentence """""" i use this code for menu but ..."
4,No sentences found to indicate the user's expe...
5,"The sentence """""" show only top 5 series of dat..."
6,"The sentence """""" Is there a workaround? """""" us..."
7,No sentences found to indicate the user's expe...
8,"The sentence """""" I would like to import variab..."
9,"The sentence """""" I would expect projected cont..."


In [20]:
df.to_csv("experiment/random_pattern/answer_rand.csv",index = None)

In [33]:
from sklearn import metrics as mt

In [34]:
# PS results 

df_test_random = pd.read_csv("experiment/random_pattern/PS/gemini/ps_rand_withnegno1022_gemini_result(t=0.8).csv")
predictions_ps = df_test_random["y''_PS"]
y_test = df_test_random["y_PS"]
precison_ps = mt.precision_score(y_test, predictions_ps)
recall_ps = mt.recall_score(y_test, predictions_ps)
score_ps = mt.f1_score(y_test, predictions_ps)

print("precision:",round(precison_ps,3),"recall:",round(recall_ps,3),"F1:",round(score_ps,3))

precision: 0.644 recall: 0.948 F1: 0.767
