In [2]:
import os
import json
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from together import Together
from transformers import AutoTokenizer

from utils import *
from huggingface_hub import login as hf_login
from peft import prepare_model_for_kbit_training
from datasets import concatenate_datasets, DatasetDict, load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoModel

In [3]:
## login & load together ai client
# key = '779d92de61a5035835e5023ca79e2e5b6124c6300c3ceb0e07e374f948554116'
key = 'e94217f61953b12489a9877936bd5383086106ec9951d3f11bb6a9475d88e95e'
client = Together(api_key=key)
hf_login(token="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")

## load dataset

dataset = load_dataset("beanham/medsum_privacy")
merged_dataset = concatenate_datasets([dataset['train'], dataset['validation'], dataset['test']])

api_keys_subsample_ids = [
    # ('lr2872/Meta-Llama-3.1-8B-Instruct-Reference-dc7e8be7-f7a4e861','4')
    ('lr2872/Meta-Llama-3.1-8B-Instruct-Reference-dd180cb8-b4b00ce6', '3')
]
# lr2872/Meta-Llama-3.1-8B-Instruct-Reference-2968ad77-11f16750 4
# lr2872/Meta-Llama-3.1-8B-Instruct-Reference-baf1323a-aaf0f7d2 3

# 4 epochs
# lr2872/Meta-Llama-3.1-8B-Instruct-Reference-dc7e8be7-f7a4e861 4

shadow_models = load_shadow_models_for_llama_3_instruct(MODEL_DICT, api_keys_subsample_ids)
model_world = MODEL_DICT['llama_3_1_instruct']
target_model_class = 'llama_3_1_instruct'
print(f"Shadow models: {shadow_models}")
target_model_api_key, _, target_subsample_ids = shadow_models[0]
shadow_models_tuples = shadow_models[1:]
shadow_model_api_keys = [api_key for api_key, _, _ in shadow_models_tuples]

train_dataset = merged_dataset.filter(lambda example: example['ID'] in target_subsample_ids)
test_dataset = merged_dataset.filter(lambda example: example['ID'] not in target_subsample_ids)
unseen_ents = [sample['ents'] for sample in test_dataset if len(sample['ents']) < 5]
unseen_ents = [item for sublist in unseen_ents for item in sublist]
train_dataset = [sample for sample in train_dataset if len(sample['ents']) >= 5]
test_dataset = [sample for sample in test_dataset if len(sample['ents']) >= 5]
train_test_ents = {
    'train': train_dataset,
    'test': test_dataset
}

fake_train_test_ents = {
    'train': train_dataset[0:1],
    'test': test_dataset[0:1]
}

Shadow models: [('lr2872/Meta-Llama-3.1-8B-Instruct-Reference-dd180cb8-b4b00ce6', 'meta-llama/Meta-Llama-3-8B-Instruct-3', [54, 831, 113, 355, 927, 565, 797, 755, 111, 127, 429, 75, 409, 89, 380, 829, 923, 134, 263, 126, 261, 847, 1013, 1095, 64, 128, 681, 933, 3, 153, 796, 28, 1029, 1012, 275, 896, 143, 1157, 146, 384, 764, 352, 164, 1062, 1156, 867, 316, 709, 185, 59, 48, 255, 193, 733, 720, 28, 90, 303, 106, 144, 910, 387, 685, 794, 145, 14, 597, 230, 1, 636, 124, 1020, 182, 1124, 484, 445, 489, 386, 94, 804, 531, 11, 856, 444, 574, 147, 4, 108, 57, 613, 16, 703, 348, 18, 640, 107, 151, 578, 25, 287, 130, 1091, 315, 35, 152, 87, 364, 1090, 295, 1075, 165, 893, 936, 353, 612, 1135, 707, 619, 974, 18, 45, 630, 1155, 2, 65, 168, 112, 36, 940, 246, 621, 76, 1103, 562, 163, 656, 138, 939, 163, 932, 286, 596, 0, 692, 725, 1153, 190, 1068, 758, 911, 1046, 6, 235, 126, 60, 136, 5, 47, 563, 176, 1167, 851, 68, 81, 32, 1088, 44, 349, 453, 766, 795, 536, 491, 115, 616, 653, 1051, 1158, 969, 10

In [4]:
prob_generator = GenerateNextTokenProbAPI(client, target_model_api_key)

In [None]:
results={}
fail_counter = 0
# get first 50 pairs from train_test_ents 

for name, samples in train_test_ents.items():
    print(f"running {name}")
    
    for ent_list in samples[:25]:
        print(f"running {ent_list}")
        
        ## create saving dictionary
        key=name+'_'+str(ent_list['ID'])
        results[key]={}
        results[key]['y_stars']={}
        results[key]['y_NON_stars']={}        
        ents = list(set(ent_list['ents']))
        k = len(ents)
        unseen_ents_for_sample = random.sample(unseen_ents, k)
        
        ## go through each y_star
        for i in tqdm(range(k)):
            
            y_star = ents[i]
            y_NON_star = unseen_ents_for_sample[i]
            results[key]['y_stars'][y_star]={}
            results[key]['y_NON_stars'][y_NON_star]={}                        
            remaining_ents = ents[:i] + ents[i + 1:]            
            
            # prompt = f"consider someone with the following conditions: {', '.join(remaining_ents)}. the individual then also has the condition "
            prompt_start = PROMPT_TEMPLATE[PROMPT_TO_USE][0]
            prompt_end = PROMPT_TEMPLATE[PROMPT_TO_USE][1]
            ents_string = ', '.join(remaining_ents)
            prompt = f"{prompt_start} {ents_string} {prompt_end}"

            print(prompt)

            prob = compute_token_probs_api(y_star, prompt, prob_generator) 
            prob_NON = compute_token_probs_api(y_NON_star, prompt, prob_generator)            
            if prob == -1 or prob_NON == -1:
                fail_counter += 1
                print(f"failed {fail_counter} times")
                continue            
            results[key]['y_stars'][y_star]['target']=prob
            results[key]['y_NON_stars'][y_NON_star]['target']=prob_NON

running train
running {'ID': 0, 'section_header': 'history of present illness', 'section_text': 'The patient is a 76-year-old white female who presents to the clinic today originally for hypertension and a med check.  She has a history of hypertension, osteoarthritis, osteoporosis, hypothyroidism, allergic rhinitis and kidney stones.  Since her last visit she has been followed by Dr. Kumar.  Those issues are stable.  She has had no fever or chills, cough, congestion, nausea, vomiting, chest pain, chest pressure.', 'dialogue': 'Doctor: What brings you back into the clinic today, miss? \nPatient: I came in for a refill of my blood pressure medicine. \nDoctor: It looks like Doctor Kumar followed up with you last time regarding your hypertension, osteoarthritis, osteoporosis, hypothyroidism, allergic rhinitis and kidney stones.  Have you noticed any changes or do you have any concerns regarding these issues?  \nPatient: No. \nDoctor: Have you had any fever or chills, cough, congestion, nau

  0%|          | 0/11 [00:00<?, ?it/s]

consider someone with the following conditions:  chest pain, kidney stones, cough, hypertension, nausea,, osteoarthritis, fever, chills, osteoporosis, allergic rhinitis . the individual then also has the condition 





InvalidRequestError: Error code: 400 - {"message": "The dedicated endpoint for lr2872/Meta-Llama-3.1-8B-Instruct-Reference-dd180cb8-b4b00ce6 is not running. Please visit https://api.together.ai/models to start the endpoint.", "type_": "invalid_request_error", "code": "dedicated_endpoint_not_running"}

In [None]:
with open(f'target_token_probs_3_prompt_0_4_epochs.json', 'w') as f:
    json.dump(results, f)