To view detailed visualizations and logs of the evaluations, please visit the [WandB dashboard](https://wandb.ai/daves24/Evaluating_phi-1.5_for_Kotlin-code-completion?workspace=user-daves24) associated with this project.

In [None]:
# !pip install torch torchvision
# !pip install transformers
# !pip install datasets
# !pip install peft
# !pip install bitsandbytes
# !pip install accelerate -U
# !pip install wandb

In [None]:
import torch
import requests
import gdown

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model
from typing import Callable

In [None]:
import wandb
wandb.login()

In [None]:
base_model_id = "microsoft/phi-1_5"
tuned_model_link = "https://drive.google.com/uc?id=TODO: ADD LINK TO [adapter_model.safetensors]"
tuned_model_path = "/content/model/fine-tuned/model"
model_log_dir = "/content/model/testing/"

kotlin_data_url = "https://raw.githubusercontent.com/DaveS24/KotComplete/main/data/Kotlin/test.jsonl"
codexglue_data_url = "https://raw.githubusercontent.com/DaveS24/KotComplete/main/data/CodeXGLUE/test.jsonl"
dataset_loader_url = "https://raw.githubusercontent.com/DaveS24/KotComplete/main/src/dataset_loader.py"

In [None]:
gdown.download(tuned_model_link, tuned_model_path)

In [None]:
response_data_parser = requests.get(dataset_loader_url)
dataset_loader_code = response_data_parser.text

load_jsonl_from_url: Callable
create_and_tokenize_dataset: Callable
dataset_summary: Callable

exec(dataset_loader_code)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["fc1", "fc2", "Wqkv", "out_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
base_phi_model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config)
base_phi_model = get_peft_model(base_phi_model, lora_config)
base_phi_model.print_trainable_parameters()

In [None]:
tuned_phi_model = AutoModelForCausalLM.from_pretrained(tuned_model_path, quantization_config=bnb_config)
tuned_phi_model = get_peft_model(tuned_phi_model, lora_config)
tuned_phi_model.print_trainable_parameters()

In [None]:
phi_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
phi_tokenizer.pad_token = phi_tokenizer.eos_token

In [None]:
kotlin_test_data = load_jsonl_from_url(kotlin_data_url, use_subset=True, subset_ratio=0.25)
kotlin_test_dataset = create_and_tokenize_dataset(kotlin_test_data, phi_tokenizer)

dataset_summary(kotlin_test_dataset)

In [None]:
codexglue_test_data = load_jsonl_from_url(codexglue_data_url, use_subset=True, subset_ratio=0.25)
codexglue_test_dataset = create_and_tokenize_dataset(codexglue_test_data, phi_tokenizer)

dataset_summary(codexglue_test_dataset)

In [None]:
base_training_args = TrainingArguments(
    output_dir=model_log_dir + 'base/output/',
    per_device_eval_batch_size=8,
    evaluation_strategy="steps",
    disable_tqdm=True
)

base_trainer = Trainer(
    model=base_phi_model,
    args=base_training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer=phi_tokenizer, mlm=False)
)

In [None]:
run_base_kotlin = wandb.init(project='Evaluating_phi-1.5_for_Kotlin-code-completion', name='base_kotlin_test', job_type='eval', anonymous='allow')

base_trainer.evaluate(eval_dataset=kotlin_test_dataset)

run_base_kotlin.finish()

In [None]:
run_base_codexglue = wandb.init(project='Evaluating_phi-1.5_for_Kotlin-code-completion', name='base_codexglue_test', job_type='eval', anonymous='allow')

base_trainer.evaluate(eval_dataset=codexglue_test_dataset)

run_base_codexglue.finish()

In [None]:
tuned_training_args = TrainingArguments(
    output_dir=model_log_dir + 'tuned/output/',
    per_device_eval_batch_size=8,
    evaluation_strategy="steps",
    disable_tqdm=True
)

tuned_trainer = Trainer(
    model=tuned_phi_model,
    args=tuned_training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer=phi_tokenizer, mlm=False)
)

In [None]:
run_tuned_kotlin = wandb.init(project='Evaluating_phi-1.5_for_Kotlin-code-completion', name='tuned_kotlin_test', job_type='eval', anonymous='allow')

tuned_trainer.evaluate(eval_dataset=kotlin_test_dataset)

run_tuned_kotlin.finish()

In [None]:
run_tuned_codexglue = wandb.init(project='Evaluating_phi-1.5_for_Kotlin-code-completion', name='tuned_codexglue_test', job_type='eval', anonymous='allow')

tuned_trainer.evaluate(eval_dataset=codexglue_test_dataset)

run_tuned_codexglue.finish()