## CodeSearch

In [7]:
import pandas as pd
import json
import os
from pathlib import Path

BASE_PATH = Path('checkpoints/')
pre_trained_models = ['Salesforce-codet5-base', 'microsoft-codebert-base', 'microsoft-graphcodebert-base']
languages = ['go', 'java', 'javascript', 'php', 'python', 'ruby']


class TrainedModel():
    def __init__(self, model_name, language=None, task=None, experiment=None):
        self.model_name = model_name
        self.language = language
        self.experiment = experiment
        self.task = task
        if self.task == 'codesearch':
            self.model_dir = BASE_PATH / self.task / self.experiment / self.language / self.model_name if experiment \
                is not None else BASE_PATH / self.task / self.language / self.model_name
        elif self.task == 'code2test':
            self.model_dir = BASE_PATH / self.task / self.experiment  / self.model_name if experiment \
                is not None else BASE_PATH / self.task / self.model_name
        self.metrics = {}
        
    def add_metric(self, name, value):
        self.metrics[name] = value

In [18]:
second_take_models = []

for model_name in pre_trained_models:
    for language in languages:
        model = TrainedModel(model_name, language, 'codesearch', 'two_takes_step_2')
        file_name = [f for f in os.listdir(model.model_dir) if 'metric' in f][0]
        results = json.load(open(model.model_dir / file_name, 'r'))
        model.add_metric('test', results['test'])
        model.add_metric('valid', results['valid'])
        second_take_models.append(model)


original_models = []

for model_name in pre_trained_models:
    for language in languages:
        model = TrainedModel(model_name, language, 'codesearch')
        file_name = [f for f in os.listdir(model.model_dir) if 'metric' in f][0]
        results = json.load(open(model.model_dir / file_name, 'r'))
        model.add_metric('test', results['test'])
        model.add_metric('valid', results['valid'])
        original_models.append(model)


In [28]:
#lang = 'python'
ptm = pre_trained_models[2]
print(ptm)
for lang in languages:
    print(lang)
    for model in original_models:
        if model.language == lang and model.model_name == ptm:
            print('Original', model.metrics)
    for model in second_take_models:
        if model.language == lang and model.model_name == ptm:
            print('Second Take', model.metrics)

microsoft-graphcodebert-base
go
Original {'test': {'mrr': 0.9090841792222955}, 'valid': {'mrr': 0.9207934242164756}}
Second Take {'test': {'mrr': 0.6632587202301052}, 'valid': {'mrr': 0.6963504451710881}}
java
Original {'test': {'mrr': 0.7862952726995615}, 'valid': {'mrr': 0.8362239207723126}}
Second Take {'test': {'mrr': 0.3194628964643299}, 'valid': {'mrr': 0.2949044550921596}}
javascript
Original {'test': {'mrr': 0.5707852428647274}, 'valid': {'mrr': 0.5560066535657695}}
Second Take {'test': {'mrr': 0.29621955066693617}, 'valid': {'mrr': 0.2580100816840683}}
php
Original {'test': {'mrr': 0.8477583594034569}, 'valid': {'mrr': 0.8441795127758595}}
Second Take {'test': {'mrr': 0.0872859045249346}, 'valid': {'mrr': 0.06676916396996284}}
python
Original {'test': {'mrr': 0.7507660138804423}, 'valid': {'mrr': 0.761146024158046}}
Second Take {'test': {'mrr': 0.334576342906081}, 'valid': {'mrr': 0.3413115858586959}}
ruby
Original {'test': {'mrr': 0.704111010027275}, 'valid': {'mrr': 0.740571

## Code2Test

In [29]:
second_take_models = []
for model_name in pre_trained_models:
    model = TrainedModel(model_name, task='code2test', experiment='two_takes_step_2')
    file_name = [f for f in os.listdir(model.model_dir) if 'codebleu' in f][0]
    results = json.load(open(model.model_dir / file_name, 'r'))
    model.add_metric('ngram_match', results['ngram_match'])
    model.add_metric('weighted_ngram_match', results['weighted_ngram_match'])
    model.add_metric('syntax_match', results['syntax_match'])
    model.add_metric('dataflow_match', results['dataflow_match'])
    model.add_metric('code_bleu_score', results['code_bleu_score'])
    second_take_models.append(model)


original_models = []
for model_name in pre_trained_models:
    model = TrainedModel(model_name, task='code2test')
    file_name = [f for f in os.listdir(model.model_dir) if 'codebleu' in f][0]
    results = json.load(open(model.model_dir / file_name, 'r'))
    model.add_metric('ngram_match', results['ngram_match'])
    model.add_metric('weighted_ngram_match', results['weighted_ngram_match'])
    model.add_metric('syntax_match', results['syntax_match'])
    model.add_metric('dataflow_match', results['dataflow_match'])
    model.add_metric('code_bleu_score', results['code_bleu_score'])
    original_models.append(model)

prefix_models = []
for model_name in pre_trained_models:
    model = TrainedModel(model_name, task='code2test', experiment='prefix')
    file_name = [f for f in os.listdir(model.model_dir) if 'codebleu' in f][0]
    results = json.load(open(model.model_dir / file_name, 'r'))
    model.add_metric('ngram_match', results['ngram_match'])
    model.add_metric('weighted_ngram_match', results['weighted_ngram_match'])
    model.add_metric('syntax_match', results['syntax_match'])
    model.add_metric('dataflow_match', results['dataflow_match'])
    model.add_metric('code_bleu_score', results['code_bleu_score'])
    prefix_models.append(model)

In [32]:
ptm = pre_trained_models[2]
print(ptm)

print('Original Model')
for model in original_models:
    if model.model_name == ptm:
        print(model.metrics)
print('Prefix Model')
for model in prefix_models:
    if model.model_name == ptm:
        print(model.metrics)
print('Second Take Model')
for model in second_take_models:
    if model.model_name == ptm:
        print(model.metrics)

microsoft-graphcodebert-base
Original Model
{'ngram_match': 0.07265351143778041, 'weighted_ngram_match': 0.08190162200841648, 'syntax_match': 0.34540589033194047, 'dataflow_match': 0.33440243129395353, 'code_bleu_score': 0.20859086376802272}
Prefix Model
{'ngram_match': 0.06274945984722662, 'weighted_ngram_match': 0.07056071925530413, 'syntax_match': 0.35411137290732, 'dataflow_match': 0.32765149065028554, 'code_bleu_score': 0.20376826066503406}
Second Take Model
{'ngram_match': 0.031150174562847473, 'weighted_ngram_match': 0.03715885725011193, 'syntax_match': 0.3210261528619512, 'dataflow_match': 0.33917406460562627, 'code_bleu_score': 0.1821273123201342}
