In [3]:
import sys
sys.path.append('src')

In [4]:
from bm25_v2 import BM25Searcher
import pandas as pd
import numpy as np
from utils import get_combined_df, Args
from eval import SearchEvaluator, ModelEvaluator
import json
from tqdm import tqdm

In [334]:
data_path="data/2_7/facebook_react"
repo_name = data_path.split("/")[-1]
combined_df = get_combined_df(data_path)
index_path=f"{data_path}/index_commit_tokenized"
# REPO_DIR = '/home/ssg2/ssg2/ds/repos/facebook_react'
k=10000 # initial ranker depth
n=100 # number of samples to evaluate on

In [6]:
# load fid_to_path and path_to_fid json files to dicts
with open(f"facebook_react_FID_to_paths.json") as f:
    fid_to_path = json.load(f)

# make all fids ints
fid_to_path = {int(k): v for k, v in fid_to_path.items()}

with open(f"facebook_react_path_to_FID.json") as f:
    path_to_fid = json.load(f)

In [7]:
metrics = ['MAP', 'P@1', 'P@10', 'P@20', 'P@30', 'MRR', 'R@1', 'R@10', 'R@100', 'R@1000']
bm25_searcher = BM25Searcher(index_path, fid_to_path, path_to_fid)
evaluator = SearchEvaluator(metrics)
model_evaluator = ModelEvaluator(bm25_searcher, evaluator, combined_df, fid_to_path, path_to_fid)
gold_df = pd.read_parquet(f"gold/{repo_name}/v2_{repo_name}_gpt4_gold.parquet")
#! rename commit_message to original_message
gold_df = gold_df.rename(columns={'commit_message': 'original_message'})
#!rename transformed_message to commit_message
gold_df = gold_df.rename(columns={f'transformed_message_gpt4': 'commit_message'})

Loaded index at data/2_7/facebook_react/index_commit_tokenized
Index Stats: {'total_terms': 7587973, 'documents': 73765, 'non_empty_documents': 73765, 'unique_terms': 14602}


In [331]:
test_id, test_date, test_orig, test_files, test_query = gold_df.iloc[0]
test_files_ids = [path_to_fid[f] for f in test_files]
test_query, test_files, test_files_ids

('Default setting for "collapse new nodes" is enabled causing inconvenience. Settings popup UI does not adapt properly to smaller sizes, and "Inspect the matching DOM element" button is visible in standalone mode unnecessarily. Also, there is an issue with undefined window.addEventListener/window.removeEventListener in Hermes and incorrect viewing size for settings icon.',
 array(['packages/react-devtools-core/src/standalone.js',
        'src/backend/renderer.js',
        'src/backend/views/Highlighter/index.js', 'src/devtools/store.js',
        'src/devtools/views/ButtonIcon.js',
        'src/devtools/views/Components/Components.js',
        'src/devtools/views/Components/SelectedElement.js',
        'src/devtools/views/DevTools.js', 'src/devtools/views/Icon.js',
        'src/devtools/views/Profiler/Profiler.js',
        'src/devtools/views/Settings/ComponentsSettings.js',
        'src/devtools/views/Settings/SettingsContext.js',
        'src/devtools/views/Settings/SettingsModal.js',

In [335]:
bm25_results = bm25_searcher.search(test_query, test_date, k)

In [370]:
aggregated_results = bm25_searcher.aggregate_file_scores(bm25_results, 'maxp')

In [371]:
[o.score for o in aggregated_results][:10]

[52.900299072265625,
 52.900299072265625,
 52.90029525756836,
 52.90029525756836,
 52.90029525756836,
 52.90029525756836,
 52.900291442871094,
 52.900291442871094,
 52.900291442871094,
 52.90028762817383]

In [374]:
bm25_fid = [o.fid for o in aggregated_results]

In [373]:
evaluator.evaluate(aggregated_results, test_files_ids)

{'MAP': 0.0066,
 'P@1': 0.0,
 'P@10': 0.0,
 'P@20': 0.0,
 'P@30': 0.0,
 'MRR': 0.0014,
 'R@1': 0.0,
 'R@10': 0.0,
 'R@100': 0.0,
 'R@1000': 0.6429}

In [375]:
# get indices of all test_file_ids in bm25_fid
[bm25_fid.index(fid) for fid in test_files_ids if fid in bm25_fid]

[721, 738, 742, 744, 745, 754, 757, 759, 762]

In [12]:
# not sure why this file doesn't exist
# path_to_fid['packages/react/src/ReactServerSharedInternals.js']

In [346]:
test_query

'Default setting for "collapse new nodes" is enabled causing inconvenience. Settings popup UI does not adapt properly to smaller sizes, and "Inspect the matching DOM element" button is visible in standalone mode unnecessarily. Also, there is an issue with undefined window.addEventListener/window.removeEventListener in Hermes and incorrect viewing size for settings icon.'

In [348]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from utils import set_seed

In [349]:
set_seed(42)

In [283]:
model_name = 'microsoft/codebert-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name).to('cuda')

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
text1 = test_query
text2 = aggregated_results[0].contributing_results[0].commit_message

In [36]:
inputs = tokenizer(text1, text2, return_tensors="pt", padding=True, truncation='only_second', max_length=tokenizer.model_max_length, add_special_tokens=True)

In [286]:
model.eval()
inputs.to('cuda')
# Predict with model
with torch.no_grad():
    outputs = model(**inputs)

# Assuming the model has been fine-tuned for regression task
# Convert logits to similarity score (sigmoid function to get value between 0 and 1)

# get the similarity score
similarity_score = torch.softmax(outputs.logits.squeeze(), dim=0)[1].item()
model.train()
similarity_score

0.5136816501617432

In [351]:
# create a pandas dataframe of (txt1, tx2, 1) repeating for 100 rows
# train_df = pd.DataFrame({'txt1': [text1]*100, 'txt2': [text2]*100, 'label': [1]*100})
train_df = pd.read_pickle('data/2_7/facebook_react/cache/triplet_data_cache.pkl')

In [352]:
train_df.head()

Unnamed: 0,query,passage,label
0,fix getSnapshot warning when a selector return...,[useSES/extra] Reuse old selection if possible...,1
1,fix getSnapshot warning when a selector return...,Add warning and test for useSyncExternalStore ...,1
2,fix getSnapshot warning when a selector return...,fix: useSyncExternalStoreExtra (#22500)\n\n* m...,1
3,fix getSnapshot warning when a selector return...,Implement getServerSnapshot in userspace shim ...,1
4,fix getSnapshot warning when a selector return...,Implement useSyncExternalStore on server (#223...,1


In [350]:
# import os
# repo_paths = [
#     "data/2_7/apache_spark",
#     "data/2_7/apache_kafka",
#     "data/2_7/facebook_react",
#     "data/2_8/angular_angular",
#     "data/2_8/django_django",
#     "data/2_8/pytorch_pytorch",
#     "data/2_7/julialang_julia",
#     "data/2_7/ruby_ruby",
#     "data/2_9/huggingface_transformers",
#     "data/2_9/redis_redis",
# ]
# combined_triplet_data = pd.DataFrame()
# for data_path in tqdm(repo_paths, total=len(repo_paths)):
#     triplet_cache = os.path.join(data_path, 'cache', 'gpt_triplet_data_cache.pkl')
#     if os.path.exists(triplet_cache):
#         repo_triplet_data = pd.read_pickle(triplet_cache)
#         combined_triplet_data = pd.concat([combined_triplet_data, repo_triplet_data], ignore_index=True)
#     else:
#         print(f"Warning: Triplet cache not found for {data_path}, skipping this repository.")

In [234]:
train_df = combined_triplet_data

In [237]:
from datasets import Dataset
dataset = Dataset.from_pandas(train_df)

In [239]:
def tokenize_function(example):
    return tokenizer(example['query'], example['passage'], return_tensors="pt", padding=True, truncation='only_second', max_length=tokenizer.model_max_length, add_special_tokens=True)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.map(lambda examples: {'labels': examples['label']}, batched=True)

Map:   0%|          | 0/74385 [00:00<?, ? examples/s]

Map:   0%|          | 0/74385 [00:00<?, ? examples/s]

In [243]:
from transformers import Trainer, TrainingArguments

In [264]:
training_args = TrainingArguments(
    output_dir='./tmp',          # output directory for model checkpoints
    evaluation_strategy="no",     # evaluation is done at the end of each epoch
    learning_rate=2e-5,
    per_device_train_batch_size=8,   # batch size per device during training
    per_device_eval_batch_size=8,    # batch size for evaluation
    num_train_epochs=1,              # total number of training epochs
    weight_decay=0.01,               # strength of weight decay,
    report_to='none',
    fp16=True,

)

In [330]:
# trainer = Trainer(
#     model=bert_reranker.model,
#     args=training_args,
#     train_dataset=tokenized_datasets,
# )
# trainer.train()

In [353]:
class Args:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

args = Args(
    index_path='data/2_7/facebook_react/index_commit_tokenized',
    repo_path='data/2_7/facebook_react', k=10000, n=100,
    model_path='microsoft/codebert-base', overwrite_cache=False,
    batch_size=32, num_epochs=3, learning_rate=2e-05,
    run_name='debug',
    notes='debug (ignore)',
    num_positives=10, num_negatives=10, train_depth=10000, num_workers=8,
    train_commits=1000, psg_cnt=25, use_gpu=True,
    rerank_depth=250, do_train=True, do_eval=True, eval_gold=True, openai_model='gpt4',
    overwrite_eval=False, sanity_check=True, debug=False,
    psg_len=350, psg_stride=250, ignore_gold_in_training=False,
    eval_folder='debug', use_gpt_train=True,
    aggregation_strategy='sump',

)

In [421]:
bert_params = {
        'model_name': args.model_path,
        'psg_cnt': 5,
        'aggregation_strategy': 'maxp',
        'batch_size': args.batch_size,
        'use_gpu': args.use_gpu,
        'rerank_depth': 1000,
        'num_epochs': args.num_epochs,
        'lr': args.learning_rate,
        'num_positives': args.num_positives,
        'num_negatives': args.num_negatives,
        'train_depth': args.train_depth,
        'num_workers': args.num_workers,
        'train_commits': args.train_commits,
        'bm25_aggr_strategy': 'maxp',
        'output_length': 1000
    }
bert_params

{'model_name': 'microsoft/codebert-base',
 'psg_cnt': 5,
 'aggregation_strategy': 'maxp',
 'batch_size': 32,
 'use_gpu': True,
 'rerank_depth': 1000,
 'num_epochs': 3,
 'lr': 2e-05,
 'num_positives': 10,
 'num_negatives': 10,
 'train_depth': 10000,
 'num_workers': 8,
 'train_commits': 1000,
 'bm25_aggr_strategy': 'maxp',
 'output_length': 1000}

In [355]:
from models import BERTReranker

In [422]:
bert_reranker = BERTReranker(bert_params, 'classification')
bert_reranker.model = AutoModelForSequenceClassification.from_pretrained('/home/ssg2/ssg2/ds/models/facebook_react/bert_reranker/bm25_fix_combined_bert_classification/best_model', num_labels=2)
bert_reranker.model.to(bert_reranker.device)

Using device: cuda
Using GPU: Quadro RTX 6000
GPU Device Count: 1
GPU Memory Usage: 6230.80 MB
Using classification model


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Initialized BERT reranker with parameters: {'model_name': 'microsoft/codebert-base', 'psg_cnt': 5, 'aggregation_strategy': 'maxp', 'batch_size': 32, 'use_gpu': True, 'rerank_depth': 1000, 'num_epochs': 3, 'lr': 2e-05, 'num_positives': 10, 'num_negatives': 10, 'train_depth': 10000, 'num_workers': 8, 'train_commits': 1000, 'bm25_aggr_strategy': 'maxp', 'output_length': 1000}


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [423]:
bert_agg_results = bert_reranker.rerank_pipeline(test_query, aggregated_results)
bert_fids = [o.fid for o in bert_agg_results]

In [414]:
[bm25_fid.index(fid) if fid in bm25_fid else -1 for fid in test_files_ids]

[721, 738, 742, 744, -1, 745, 754, 757, -1, 759, -1, 762, -1, -1]

In [424]:
# get indices of all test_file_ids in bm25_fid
[bert_fids.index(fid) if fid in bert_fids else -1 for fid in test_files_ids]

[10, 13, 497, 499, -1, 500, 509, 512, -1, 514, -1, 15, -1, -1]

In [425]:
evaluator.evaluate(bert_agg_results, test_files_ids)

{'MAP': 0.0553,
 'P@1': 0.0,
 'P@10': 0.0,
 'P@20': 0.15,
 'P@30': 0.1,
 'MRR': 0.0909,
 'R@1': 0.0,
 'R@10': 0.0,
 'R@100': 0.2143,
 'R@1000': 0.6429}

In [None]:
{'MAP': 0.0066,
 'P@1': 0.0,
 'P@10': 0.0,
 'P@20': 0.0,
 'P@30': 0.0,
 'MRR': 0.0014,
 'R@1': 0.0,
 'R@10': 0.0,
 'R@100': 0.0,
 'R@1000': 0.6429}

In [None]:
# get indices of all test_file_ids in bm25_fid
[bm25_fid.index(fid) for fid in test_files_ids if fid in bm25_fid]

[721, 738, 742, 744, 745, 754, 757, 759, 762]

In [419]:
test_query

'Default setting for "collapse new nodes" is enabled causing inconvenience. Settings popup UI does not adapt properly to smaller sizes, and "Inspect the matching DOM element" button is visible in standalone mode unnecessarily. Also, there is an issue with undefined window.addEventListener/window.removeEventListener in Hermes and incorrect viewing size for settings icon.'

In [420]:
bert_agg_results[0].contributing_results[0].commit_message

"[scheduler] 5/n Error handling in scheduler (#12920)\n\n* Initial failing unit test for error handling in schedule\n\n**what is the change?:**\nsee title\n\n**why make this change?:**\nAdding tests for the error handling behavior we are about to add. This\ntest is failing, which gives us the chance to make it pass.\n\nWrote skeletons of some other tests to add.\n\nUnit testing this way is really hacky, and I'm also adding to the\nfixture to test this in the real browser environment.\n\n**test plan:**\nRan new test, saw it fail!\n\n* Add fixture for testing error handling in scheduler\n\n**what is the change?:**\nAdded a fixture which does the following -\nlogs in the console to show what happens when you use\n`requestAnimationFrame` to schedule a series of callbacks and some of\nthem throw errors.\n\nThen does the same actions with the `scheduler` and verifies that it\nbehaves in a similar way.\n\nHard to really verify the errors get thrown at the proper time without\nlooking at the c