In [33]:
!pip install tree-sitter
!pip install tree-sitter-python



In [34]:
!pip install datasets==3.6.0



In [35]:
from datasets import load_dataset

codesearchnet_dataset = load_dataset('code-search-net/code_search_net', 'python', split='test', trust_remote_code=True).select(range(1000))

print("CodeSearchNet dataset loaded successfully.")
print(f"Dataset size: {len(codesearchnet_dataset)}")

CodeSearchNet dataset loaded successfully.
Dataset size: 1000


In [36]:
example = codesearchnet_dataset['whole_func_string'][4]
print(example)

def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    """Downloads Dailymotion videos by URL.
    """

    html = get_content(rebuilt_url(url))
    info = json.loads(match1(html, r'qualities":({.+?}),"'))
    title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
            match1(html, r'"title"\s*:\s*"([^"]+)"')
    title = unicodize(title)

    for quality in ['1080','720','480','380','240','144','auto']:
        try:
            real_url = info[quality][1]["url"]
            if real_url:
                break
        except KeyError:
            pass

    mime, ext, size = url_info(real_url)

    print_info(site_info, title, mime, size)
    if not info_only:
        download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge)


In [37]:
import tree_sitter_python as tspython
from tree_sitter import Language, Parser, Query, QueryCursor

PY_LANGUAGE = Language(tspython.language())

parser = Parser(PY_LANGUAGE)

def parse_func(full_func_def):
  tree = parser.parse(bytes(full_func_def, "utf8"))

  query = Query(
      PY_LANGUAGE,
      """
  (function_definition
    name: (identifier) @func_name
    body: (block) @func_body)
  """, encoding="utf8"
  )

  query_cursor = QueryCursor(query)
  captures = query_cursor.captures(tree.root_node)

  func_name = captures["func_name"][0].text.decode('utf-8')
  func_body = captures["func_body"][0].text.decode('utf-8')

  body_tree = parser.parse(bytes(func_body, "utf8"))

  comment_query = Query(
      PY_LANGUAGE,
      """
      (comment) @comment
      (expression_statement (string) @docstring)
      """, encoding="utf8"
  )

  comment_query_cursor = QueryCursor(comment_query)
  comment_captures_dict = comment_query_cursor.captures(body_tree.root_node)

  comment_byte_ranges = []
  for tag_name, node_list in comment_captures_dict.items():
      for node in node_list:
          comment_byte_ranges.append((node.start_byte, node.end_byte))
  comment_byte_ranges.sort()

  cleaned_code_parts = []
  current_byte_pos = 0

  for start_byte, end_byte in comment_byte_ranges:
      if start_byte > current_byte_pos:
          cleaned_code_parts.append(func_body[current_byte_pos:start_byte])
      current_byte_pos = end_byte

  if current_byte_pos < len(func_body):
      cleaned_code_parts.append(func_body[current_byte_pos:])

  cleaned_code = "".join(cleaned_code_parts)

  return func_name, func_body, cleaned_code

In [38]:
def add_parsed_fields(example):
  func_name, func_body, cleaned_code = parse_func(example['whole_func_string'])
  return {
      'm_func_name': func_name,
      'm_func_body': "    " + cleaned_code.strip(),
      'm_func_body_with_docs': func_body.strip()
  }

codesearchnet_dataset = codesearchnet_dataset.map(add_parsed_fields)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [39]:
for idx in range(5, 8):
  print("[func_name]:")
  print(codesearchnet_dataset['func_name'][idx])
  print("[m_func_name]:")
  print(codesearchnet_dataset['m_func_name'][idx])
  print("[m_func_body_with_docs]:")
  print(codesearchnet_dataset['m_func_body_with_docs'][idx])
  print("[m_func_body]:")
  print(codesearchnet_dataset['m_func_body'][idx])
  print("[func_documentation_string]:")
  print(codesearchnet_dataset['func_documentation_string'][idx])

[func_name]:
dictify
[m_func_name]:
dictify
[m_func_body_with_docs]:
"""http://stackoverflow.com/a/30923963/2946714"""
    if root:
        return {r.tag : dictify(r, False)}
    d=copy(r.attrib)
    if r.text:
        d["_text"]=r.text
    for x in r.findall("./*"):
        if x.tag not in d:
            d[x.tag]=[]
        d[x.tag].append(dictify(x,False))
    return d
[m_func_body]:
    if root:
        return {r.tag : dictify(r, False)}
    d=copy(r.attrib)
    if r.text:
        d["_text"]=r.text
    for x in r.findall("./*"):
        if x.tag not in d:
            d[x.tag]=[]
        d[x.tag].append(dictify(x,False))
    return d
[func_documentation_string]:
http://stackoverflow.com/a/30923963/2946714
[func_name]:
ucas_download_single
[m_func_name]:
ucas_download_single
[m_func_body_with_docs]:
'''video page'''
    html = get_content(url)
    # resourceID is UUID
    resourceID = re.findall( r'resourceID":"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', html)[0]


In [40]:
!pip install transformers
!pip install evaluate



In [41]:
from transformers import AutoTokenizer, T5ForConditionalGeneration

checkpoint = "Salesforce/codet5p-220m"
device = "cuda" # for GPU usage or "cpu" for CPU usage

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = T5ForConditionalGeneration.from_pretrained(checkpoint).to(device)

In [42]:
inputs = tokenizer.encode("def print_hello_world():<extra_id_0>", max_length=512, truncation=True, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_length=10)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


    print "Hello World"



In [43]:
def predict(prog):
  code_to_mask = "def <extra_id_0>(*args):\n" + prog

  inputs = tokenizer.encode(code_to_mask, max_length=512, truncation=True, return_tensors="pt").to(device)

  outputs = model.generate(inputs, max_length=16)

  if len(outputs) == 0:
    return "func"

  outputs = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

  if len(outputs) == 0:
    return "func"

  outputs = outputs.split(' ')

  if len(outputs) == 0:
    return "func"

  outputs = outputs[0]

  p_output = ""

  for ch in outputs:
    if ('a' <= ch and ch <= 'z') or ('0' <= ch and ch <= '9') or ch == '_':
      p_output += ch
    else:
      p_output += " "

  p_output = list(p_output.split())

  if len(p_output) > 1 and p_output[0] == "def":
    return p_output[1]

  if len(p_output) == 0:
    return "func"

  return p_output[0]


for i in range(10):
  print("-----------")
  print(predict(codesearchnet_dataset['m_func_body'][i]))
  print(codesearchnet_dataset['m_func_name'][i])
  print("-----------")

-----------
match1
get_vid_from_url
-----------
-----------
parse_xml_data
sina_xml_to_url_list
-----------
-----------
md5
makeMimi
-----------
-----------
fc2video_download_by_upid
fc2video_download
-----------
-----------
download_urls
dailymotion_download
-----------
-----------
dictify
dictify
-----------
-----------
main
ucas_download_single
-----------
-----------
ucas_download
ucas_download_playlist
-----------
-----------
main
sina_download_by_vid
-----------
-----------
download_urls
sina_download_by_vkey
-----------


In [44]:
!pip install rouge_score

import evaluate

exact_match = evaluate.load('exact_match')
rouge = evaluate.load('rouge')

print('Exact Match metric loaded.')
print('ROUGE metric loaded.')

Exact Match metric loaded.
ROUGE metric loaded.


In [45]:
import evaluate

exact_match = evaluate.load('exact_match')
rouge = evaluate.load('rouge')

print('Exact Match metric loaded.')
print('ROUGE metric loaded.')

Exact Match metric loaded.
ROUGE metric loaded.


In [47]:
predictions = []
references = []
results = []

for i in range(len(codesearchnet_dataset)):#len(codesearchnet_dataset)
  predicted_func_name = predict(codesearchnet_dataset['m_func_body'][i])
  actual_func_name = codesearchnet_dataset['m_func_name'][i]

  predictions.append(predicted_func_name)
  references.append(actual_func_name)
  results.append([rouge.compute(predictions=[predicted_func_name], references=[actual_func_name])['rouge1'], actual_func_name, predicted_func_name, i])
  print(i)

print(f"Collected {len(predictions)} predictions and {len(references)} references.")

exact_match_results = exact_match.compute(predictions=predictions, references=references)
rouge_results = rouge.compute(predictions=predictions, references=references)

print(f"Exact Match: {exact_match_results['exact_match']}")
print(f"ROUGE Score: {rouge_results['rouge1']}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [48]:
results.sort(key=lambda p:p[0])
print(results[0:10])

[[np.float64(0.0), 'get_vid_from_url', 'match1', 0], [np.float64(0.0), 'makeMimi', 'md5', 2], [np.float64(0.0), 'ucas_download_single', 'main', 6], [np.float64(0.0), 'sina_download_by_vid', 'main', 8], [np.float64(0.0), 'veoh_download_by_id', 'main', 13], [np.float64(0.0), 'sprint', 'color_to_color', 16], [np.float64(0.0), 'e', 'main', 18], [np.float64(0.0), 'wtf', 'main', 19], [np.float64(0.0), 'vimeo_download_by_id', 'main', 24], [np.float64(0.0), 'ckplayer_get_info_by_xml', 'def', 25]]


In [49]:
predictions = []
references = []
results = []

for i in range(len(codesearchnet_dataset)):#len(codesearchnet_dataset)
  predicted_func_name = predict(codesearchnet_dataset['m_func_body_with_docs'][i])
  actual_func_name = codesearchnet_dataset['m_func_name'][i]

  predictions.append(predicted_func_name)
  references.append(actual_func_name)
  results.append([rouge.compute(predictions=[predicted_func_name], references=[actual_func_name])['rouge1'], actual_func_name, predicted_func_name, i])
  print(i)

print(f"Collected {len(predictions)} predictions and {len(references)} references.")

exact_match_results = exact_match.compute(predictions=predictions, references=references)
rouge_results = rouge.compute(predictions=predictions, references=references)

print(f"Exact Match: {exact_match_results['exact_match']}")
print(f"ROUGE Score: {rouge_results['rouge1']}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [89]:
results.sort(key=lambda p:p[0])
print(results[0:10])

[[np.float64(0.0), 'get_vid_from_url', 'extract_video_id'], [np.float64(0.0), 'makeMimi', 'md5'], [np.float64(0.0), 'ucas_download_single', 'main'], [np.float64(0.0), 'veoh_download_by_id', 'main'], [np.float64(0.0), 'download_by_id', 'oke'], [np.float64(0.0), 'sprint', 'format_text'], [np.float64(0.0), 'e', 'print_error'], [np.float64(0.0), 'wtf', 'main'], [np.float64(0.0), 'ckplayer_get_info_by_xml', 'def'], [np.float64(0.0), 'get_video_url_from_video_id', '255']]


In [20]:
from datasets import load_dataset

javascript_dataset = load_dataset('code-search-net/code_search_net', 'javascript', split='test', trust_remote_code=True).select(range(1000))

print("CodeSearchNet dataset loaded successfully.")
print(f"Dataset size: {len(javascript_dataset)}")

data/javascript.zip:   0%|          | 0.00/1.66G [00:00<?, ?B/s]

Generating train split:   0%|          | 0/123889 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6483 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/8253 [00:00<?, ? examples/s]

CodeSearchNet dataset loaded successfully.
Dataset size: 1000


In [21]:
example = javascript_dataset['whole_func_string'][4]
print(example)

function forEach(obj, fn) {
  // Don't bother if no value provided
  if (obj === null || typeof obj === 'undefined') {
    return;
  }

  // Force an array if not already something iterable
  if (typeof obj !== 'object') {
    /*eslint no-param-reassign:0*/
    obj = [obj];
  }

  if (isArray(obj)) {
    // Iterate over array values
    for (var i = 0, l = obj.length; i < l; i++) {
      fn.call(null, obj[i], i, obj);
    }
  } else {
    // Iterate over object keys
    for (var key in obj) {
      if (Object.prototype.hasOwnProperty.call(obj, key)) {
        fn.call(null, obj[key], key, obj);
      }
    }
  }
}


In [22]:
!pip install tree-sitter-javascript

Collecting tree-sitter-javascript
  Downloading tree_sitter_javascript-0.25.0-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (2.2 kB)
Downloading tree_sitter_javascript-0.25.0-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (99 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/99.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.7/99.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tree-sitter-javascript
Successfully installed tree-sitter-javascript-0.25.0


In [23]:
import tree_sitter_javascript as tsjs

JS_LANGUAGE = Language(tsjs.language())

parser = Parser(JS_LANGUAGE)

def parse_func(full_func_def):
  tree = parser.parse(bytes(full_func_def, "utf8"))

  query = Query(
      JS_LANGUAGE,
      """
  (function_declaration
    name: (identifier) @func_name
    body: (statement_block) @func_body)
  """, encoding="utf8"
  )

  query_cursor = QueryCursor(query)
  captures = query_cursor.captures(tree.root_node)

  if "func_name" in captures:
    func_name = captures["func_name"][0].text.decode('utf-8')
  else:
    func_name = "func"

  if "func_body" in captures:
    func_body = captures["func_body"][0].text.decode('utf-8')
  else:
    func_body = "{}"

  body_tree = parser.parse(bytes(func_body, "utf8"))

  comment_query = Query(
      JS_LANGUAGE,
      """
      (comment) @comment
      """, encoding="utf8"
  )

  comment_query_cursor = QueryCursor(comment_query)
  comment_captures_dict = comment_query_cursor.captures(body_tree.root_node)

  comment_byte_ranges = []
  for tag_name, node_list in comment_captures_dict.items():
      for node in node_list:
          comment_byte_ranges.append((node.start_byte, node.end_byte))
  comment_byte_ranges.sort()

  cleaned_code_parts = []
  current_byte_pos = 0

  for start_byte, end_byte in comment_byte_ranges:
      if start_byte > current_byte_pos:
          cleaned_code_parts.append(func_body[current_byte_pos:start_byte])
      current_byte_pos = end_byte

  if current_byte_pos < len(func_body):
      cleaned_code_parts.append(func_body[current_byte_pos:])

  cleaned_code = "".join(cleaned_code_parts)

  return func_name, func_body, cleaned_code

In [24]:
def add_parsed_fields(example):
  func_name, func_body, cleaned_code = parse_func(example['whole_func_string'])
  return {
      'm_func_name': func_name,
      'm_func_body': "    " + cleaned_code.strip(),
      'm_func_body_with_docs': func_body.strip()
  }

javascript_dataset = javascript_dataset.map(add_parsed_fields)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [25]:
for idx in range(5, 8):
  print("[func_name]:")
  print(javascript_dataset['func_name'][idx])
  print("[m_func_name]:")
  print(javascript_dataset['m_func_name'][idx])
  print("[m_func_body_with_docs]:")
  print(javascript_dataset['m_func_body_with_docs'][idx])
  print("[m_func_body]:")
  print(javascript_dataset['m_func_body'][idx])
  print("[func_documentation_string]:")
  print(javascript_dataset['func_documentation_string'][idx])

[func_name]:
extend
[m_func_name]:
extend
[m_func_body_with_docs]:
{
  forEach(b, function assignValue(val, key) {
    if (thisArg && typeof val === 'function') {
      a[key] = bind(val, thisArg);
    } else {
      a[key] = val;
    }
  });
  return a;
}
[m_func_body]:
    {
  forEach(b, function assignValue(val, key) {
    if (thisArg && typeof val === 'function') {
      a[key] = bind(val, thisArg);
    } else {
      a[key] = val;
    }
  });
  return a;
}
[func_documentation_string]:
Extends object a by mutably adding to it the properties of object b.

@param {Object} a The object to be extended
@param {Object} b The object to copy properties from
@param {Object} thisArg The object to bind function to
@return {Object} The resulting value of object a
[func_name]:
fetchQuery
[m_func_name]:
fetchQuery
[m_func_body_with_docs]:
{
  // Because we implement the graphql server, the client must to point to the same host
  const relayServer = process.browser ? '' : process.env.RELAY_SERVER

In [26]:
def predict_js(prog):
  code_to_mask = "//function should be named in camel case\nfunction <extra_id_0>(args)" + prog

  inputs = tokenizer.encode(code_to_mask, max_length=512, return_tensors="pt").to(device)

  outputs = model.generate(inputs, max_length=8)

  if len(outputs) == 0:
    return "func"

  outputs = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

  if len(outputs) == 0:
    return "func"

  outputs = outputs.split(' ')

  if len(outputs) == 0:
    return "func"

  outputs = outputs[0]

  p_output = ""

  for ch in outputs:
    if ('a' <= ch and ch <= 'z') or ('0' <= ch and ch <= '9') or ch == '_':
      p_output += ch
    else:
      p_output += " "

  p_output = list(p_output.split())

  if len(p_output) == 0:
    return "func"

  return p_output[0]


for i in range(10):
  print("-----------")
  print(predict(javascript_dataset['m_func_body'][i]))
  print(javascript_dataset['m_func_name'][i])
  print("-----------")

-----------
xios
createInstance
-----------
-----------
cancel
CancelToken
-----------
-----------
is_array_buffer
isArrayBufferView
-----------
-----------
is_native
isStandardBrowserEnv
-----------
-----------
for
forEach
-----------
-----------
for
extend
-----------
-----------
fetch
fetchQuery
-----------
-----------
backoff
retryStrategy
-----------
-----------
main
writePackageManifest
-----------
-----------
list_supports
supportsPreload
-----------


In [28]:
predictions = []
references = []
results = []

for i in range(len(javascript_dataset)):
  predicted_func_name = predict(javascript_dataset['m_func_body'][i])
  actual_func_name = javascript_dataset['m_func_name'][i]

  predictions.append(predicted_func_name)
  references.append(actual_func_name)
  results.append([rouge.compute(predictions=[predicted_func_name], references=[actual_func_name])['rouge1'], actual_func_name, predicted_func_name])
  print(i)

print(f"Collected {len(predictions)} predictions and {len(references)} references.")

exact_match_results = exact_match.compute(predictions=predictions, references=references)
rouge_results = rouge.compute(predictions=predictions, references=references)

print(f"Exact Match: {exact_match_results['exact_match']}")
print(f"ROUGE Score: {rouge_results['rouge1']}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [29]:
results.sort(key=lambda p:p[0])
print(results[0:10])

[[np.float64(0.0), 'createInstance', 'xios'], [np.float64(0.0), 'CancelToken', 'cancel'], [np.float64(0.0), 'isArrayBufferView', 'is_array_buffer'], [np.float64(0.0), 'isStandardBrowserEnv', 'is_native'], [np.float64(0.0), 'forEach', 'for'], [np.float64(0.0), 'extend', 'for'], [np.float64(0.0), 'fetchQuery', 'fetch'], [np.float64(0.0), 'retryStrategy', 'backoff'], [np.float64(0.0), 'writePackageManifest', 'main'], [np.float64(0.0), 'supportsPreload', 'list_supports']]


In [30]:
predictions = []
references = []
results = []

for i in range(len(javascript_dataset)):
  predicted_func_name = predict(javascript_dataset['m_func_body_with_docs'][i])
  actual_func_name = javascript_dataset['m_func_name'][i]

  predictions.append(predicted_func_name)
  references.append(actual_func_name)
  results.append([rouge.compute(predictions=[predicted_func_name], references=[actual_func_name])['rouge1'], actual_func_name, predicted_func_name])
  print(i)

print(f"Collected {len(predictions)} predictions and {len(references)} references.")

exact_match_results = exact_match.compute(predictions=predictions, references=references)
rouge_results = rouge.compute(predictions=predictions, references=references)

print(f"Exact Match: {exact_match_results['exact_match']}")
print(f"ROUGE Score: {rouge_results['rouge1']}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [31]:
results.sort(key=lambda p:p[0])
print(results[0:10])

[[np.float64(0.0), 'createInstance', 'xios'], [np.float64(0.0), 'CancelToken', 'cancel'], [np.float64(0.0), 'isArrayBufferView', 'is_array_buffer'], [np.float64(0.0), 'isStandardBrowserEnv', 'is_native'], [np.float64(0.0), 'forEach', 'for'], [np.float64(0.0), 'extend', 'for'], [np.float64(0.0), 'fetchQuery', 'graphql'], [np.float64(0.0), 'retryStrategy', 'backoff'], [np.float64(0.0), 'writePackageManifest', 'main'], [np.float64(0.0), 'supportsPreload', 'list_supports']]
