# Evaluation notebook

In [9]:
def get_context_wo_lineno(graph, source, target):
    """Find lineno from adjacent edges (both inflowing and outflowing) if current edge has no lineno."""

    context = []

    for source_node in graph.edges._adjdict:
        for target_node in graph.edges._adjdict[source_node]:
            if source == target_node:
                adjacent_edge = graph.edges._adjdict[source_node][target_node]
                context.append(f'{source_node} -----{adjacent_edge["code"]}---->{target_node}')

    for neighbor in graph.edges._adjdict[source]:
        adjacent_edge = graph.edges._adjdict[source][neighbor]
        context.append(f'{source_node} -----{adjacent_edge["code"]}---->{target_node}')
    
    for neighbor in graph.edges._adjdict[target]:
        adjacent_edge = graph.edges._adjdict[target][neighbor]
        context.append(f'{source_node} -----{adjacent_edge["code"]}---->{target_node}')
    

    return '\n'.join(context)


In [11]:
import os
import pandas as pd
from ApexDAG.label_notebooks.label_graphs import get_code_file_path
from ApexDAG.sca.graph_utils import load_graph

graph_path = "C:/Users/ismyn/UNI/TUB/data/jetbrains_dfg_100k_new/jetbrains_dfg_100k_new_labeled/execution_graphs"
files = [f for f in os.listdir(graph_path) if f.endswith(".execution_graph")]

all_graphs = []
all_code = []
for filename in files:
    if filename.endswith(".execution_graph"):
        graph_file_path = os.path.join(graph_path, filename)
            
        G = load_graph(graph_file_path)
        code_file_path = get_code_file_path(graph_file_path).replace('_labeled', '')

        with open(code_file_path, "r") as code_file:
            code_lines = code_file.readlines() 
        all_graphs.append(G)    
        all_code.append(code_lines)
        
all_edges = []
for idx, (graph, code) in enumerate(zip(all_graphs, all_code)):
    for source in graph.edges._adjdict:
        for target in graph.edges._adjdict[source]:
            edge = graph.edges._adjdict[source][target]
            lineno = edge['lineno'] if 'lineno' in edge else None
            if lineno is not None:
                start_content = max(lineno - 2, 0)
                end_content = min(lineno + 2, len(code))
                context =  ''.join(code[start_content:end_content])
            else:
                context = get_context_wo_lineno(graph, source, target)
            all_edges.append((files[idx], f'[{source} -----{edge["code"]}---->{target}]', context, edge['domain_label']))


In [12]:
import pandas as pd
df = pd.DataFrame(all_edges, columns=['file', 'edge', 'context', 'domain_label'])
df.to_csv('eval_labeling_25_02.csv')
df.head()

Unnamed: 0,file,edge,context,domain_label
0,000002378b1d1b0060bc746ae66e3191c494fe6c.execu...,[time -----sleep---->time_3],1\timport time\n2\t\n3\ttime.sleep(0.01)\n4\t\n,ENVIRONMENT
1,000002f42581aa2bfb55393bfdbc80c785616b3a.execu...,[traj0_5 -----traj0---->trajlist_9],8\t\n9\ttrajlist = [nv.MDAnalysisTrajectory(tr...,DATA_TRANSFORM
2,000002f42581aa2bfb55393bfdbc80c785616b3a.execu...,[MDAnalysis -----universe---->traj0_5],4\t\n5\ttraj0 = Universe('../data/tz2.pdb')\n6...,DATA_IMPORT_EXTRACTION
3,000002f42581aa2bfb55393bfdbc80c785616b3a.execu...,[MDAnalysis -----universe---->traj1_7],"6\t# \n7\ttraj1 = Universe(datafiles.PDB, data...",DATA_IMPORT_EXTRACTION
4,000002f42581aa2bfb55393bfdbc80c785616b3a.execu...,[traj1_7 -----traj1---->trajlist_9],8\t\n9\ttrajlist = [nv.MDAnalysisTrajectory(tr...,DATA_TRANSFORM


In [37]:
FILENAME = 'C:/Users/ismyn/UNI/TUB/data/jetbrains_dfg_100k_new/jetbrains_dfg_100k_new/code/0000dfd542cb69ec0d5aa8fb1e325a9ea837929c.code'

with open(FILENAME, 'r') as f:
    codeee = f.readlines()
    # REMOVEE THE /T
    codeee = [code.replace('\t', '').replace('\n', '') for code in codeee]

In [38]:
codeee

['',
 '1import visdom',
 '2import numpy as np',
 '3',
 '4v = visdom.Visdom()',
 '5',
 '6assert v.check_connection()',
 '7',
 "8print('visdom is up')",
 '9',
 "10print('initiating visdom windows..')",
 '11windows = {',
 "12'stat1':{",
 "13    'type':'bar',",
 "14    'data':[1, 2, 1], # init with 3 unstacked bars",
 '15 },',
 "16 'stat2':   {",
 "17    'type':'line',",
 "18    'shape':(50,2),",
 '19 },',
 "20    'stat3':   {",
 "21    'type':'pie',",
 "22    'data':[4,13,10] # init with X sections",
 '23 },',
 '24}',
 '25',
 '26for win_name in windows:',
 '27    win = windows[win_name]',
 "28    if win['type'] is 'bar':",
 "29        v.bar(win['data'], win=win_name)",
 '30    ',
 "31    if win['type'] is 'line':",
 "32        v.line(np.zeros(shape=win['shape']), win=win_name)",
 '33    ',
 "34    if win['type'] is 'pie':",
 "35        v.pie(win['data'], win=win_name)",
 '36    ',
 "37print('→ done')",
 '38',
 '39# basic graphs: bar, line, chart',
 '40',
 '41# draw 3 stacked bars in stat1