In [None]:
# !pip install transformers
# !pip install datasets
# !pip install accelerate -U
# !pip install wandb

In [None]:
import requests

from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from typing import Callable

In [None]:
import wandb

wandb.login()
run = wandb.init(project='Evaluating phi-1.5 for Kotlin code completion', job_type='eval', anonymous='allow')

In [None]:
model_id = "microsoft/phi-1_5"
kotlin_data_url = "https://raw.githubusercontent.com/DaveS24/KotComplete/main/data/Kotlin/test.jsonl"
codexglue_data_url = "https://raw.githubusercontent.com/DaveS24/KotComplete/main/data/CodeXGLUE/test.jsonl"
dataset_loader_url = "https://raw.githubusercontent.com/DaveS24/KotComplete/main/src/dataset_loader.py"

model_log_dir = "/content/model/testing/"

In [None]:
response_data_parser = requests.get(dataset_loader_url)
dataset_loader_code = response_data_parser.text

load_jsonl_from_url: Callable
create_and_tokenize_dataset: Callable
dataset_summary: Callable

exec(dataset_loader_code)

In [None]:
phi_tokenizer = AutoTokenizer.from_pretrained(model_id)
phi_tokenizer.pad_token = phi_tokenizer.eos_token

In [None]:
kotlin_test_data = load_jsonl_from_url(kotlin_data_url, use_subset=True, subset_ratio=0.25)
kotlin_test_dataset = create_and_tokenize_dataset(kotlin_test_data, phi_tokenizer)

dataset_summary(kotlin_test_dataset)

In [None]:
codexglue_test_data = load_jsonl_from_url(codexglue_data_url, use_subset=True, subset_ratio=0.25)
codexglue_test_dataset = create_and_tokenize_dataset(codexglue_test_data, phi_tokenizer)

dataset_summary(codexglue_test_dataset)

In [None]:
base_phi_model = AutoModelForCausalLM.from_pretrained(model_id)

base_training_args = TrainingArguments(
    output_dir=model_log_dir + 'base/output/',
    per_device_eval_batch_size=8,
    evaluation_strategy="steps"
)

base_trainer = Trainer(
    model=base_phi_model,
    args=base_training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer=phi_tokenizer, mlm=False)
)

In [None]:
base_trainer.evaluate(eval_dataset=kotlin_test_dataset)

In [None]:
base_trainer.evaluate(eval_dataset=codexglue_test_dataset)

In [None]:
tuned_phi_model = AutoModelForCausalLM.from_pretrained("LOADING FROM GOOGLE DRIVE LINK")

tuned_training_args = TrainingArguments(
    output_dir=model_log_dir + 'tuned/output/',
    per_device_eval_batch_size=8,
    evaluation_strategy="steps"
)

tuned_trainer = Trainer(
    model=tuned_phi_model,
    args=tuned_training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer=phi_tokenizer, mlm=False)
)

In [None]:
tuned_trainer.evaluate(eval_dataset=kotlin_test_dataset)

In [None]:
tuned_trainer.evaluate(eval_dataset=codexglue_test_dataset)