### Prepairing data

#### Input:
1) data with the initial categories and lables ("data_dbpedia_by_popularity_21037.csv" and 'data_all_with_knowledge_categories.hf')
2) trained LoRA models

In [1]:
import transformers
import torch
from transformers import pipeline

import pandas as pd
from tqdm import tqdm

from urllib.request import urlopen 
import json 
import numpy as np

tqdm.pandas()

import argparse
import os

from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset, load_from_disk

from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def accuracy_check(list_of):
    if all(list_of['p_greed']):
        return 'HighlyKnown'
    elif any(list_of['p_greed']):
        return 'MaybeKnown'
    elif any(list_of['p_sample']):
        return 'WeaklyKnown'
    else:
        return 'Unknown'

In [5]:
dataset_init = load_from_disk(os.path.join('data_all_with_knowledge_categories.hf'))
dataset_init_df = pd.DataFrame(dataset_init)
data_dbpedia = pd.read_csv("data_dbpedia_by_popularity_21037.csv")
data_all = dataset_init_df[['question', 'answer', 'greedy_ans_init', 'sample_ans_init', 'Category_init']].merge(data_dbpedia[['question', 'subject', 'relation', 'object', 'object_wikidata_id',]]).reset_index(drop=True)

In [6]:
def process_dataset(dataset, batch_size_greed, quasi_accuracy_func):
    dataset = dataset.map(quasi_accuracy_func, batched=True, batch_size=128)
    return dataset

In [7]:
def quasi_accuracy_triviaqa(samples):
    p_greed = []
    p_sample = []
    for answer, greedy_pred, sample_pred in zip(samples['answer_upd'], samples['greedy_ans'], samples['sample_ans']):
        add_ans = [i.lower().strip() for i in answer['normalized_aliases']
                   ]
        p_greed.append( any([greedy_pred.strip().lower().find(i.lower())+1 for i in add_ans ])  )
        p_sample.append( any([any([sample_i.strip().lower().find(i.lower())+1 
                                   for i in answer['normalized_aliases'] 
                                   ]) 
                              for sample_i in sample_pred
                              ]))
    
    samples['p_greed'] = p_greed
    samples['p_sample'] = p_sample
    return samples

In [8]:
def clean_out_ans(ans_dict):
        ans = ans_dict['answer'][0]
        ans_dict['answer'] = {'aliases': ans['aliases'],
                              'normalized_aliases': ans['normalized_aliases']}
        return ans_dict

### Relations domain (data collection)

In [9]:
rels_unique = list(data_all.relation.unique())
rels_domain_dict = dict()

In [10]:
non_found = []
for i in tqdm(range(len(rels_unique))):
    try:
        cur_rel = rels_unique[i].split("ontology/")[1].replace('>', '')
        url = "https://dbpedia.org/data3/"+cur_rel+".json"
        response = urlopen(url) 
        data_json = json.loads(response.read())
        domain = data_json[rels_unique[i][1:-1]]["http://www.w3.org/2000/01/rdf-schema#domain"][0]['value']
        rels_domain_dict[rels_unique[i]] = domain
    except:
        non_found.append(rels_unique[i])

100%|██████████| 390/390 [01:27<00:00,  4.45it/s]


In [11]:
non_found_range = []
for i in tqdm(range(len(non_found))):
    try:
        cur_rel = non_found[i].split("ontology/")[1].replace('>', '')
        url = "https://dbpedia.org/data3/"+cur_rel+".json"
        response = urlopen(url) 
        data_json = json.loads(response.read())
        domain = data_json[non_found[i][1:-1]]["http://www.w3.org/2000/01/rdf-schema#range"][0]['value']
        rels_domain_dict[non_found[i]] = domain
    except:
        non_found_range.append(non_found[i])

100%|██████████| 103/103 [00:21<00:00,  4.78it/s]


In [12]:
for i in range(len(data_all)):
    if data_all.loc[i, 'relation'] in rels_domain_dict:
        data_all.loc[i, 'rel_domain'] = rels_domain_dict[data_all.loc[i, 'relation']]
    else:
        data_all.loc[i, 'rel_domain'] = 'Non-defined'

### LoRA models

In [13]:
path_of_model = "llama3_1_8b_instr_lora1_onlyproj_bs8_LR0.001_seed2_trained_on_1Unknown_10HighKnown_PICKED"

In [14]:
dataset = load_from_disk(os.path.join(path_of_model, "VALID_DATASET"))        
dataset['valid'] = dataset['valid'].remove_columns(['p_greed', 'p_sample'])
aliases_upd = dataset['valid']['answer']

for i in tqdm(range(len(dataset['valid']['answer']))):
    aliases_upd[i]['normalized_aliases'] = [elem for elem in aliases_upd[i]['normalized_aliases'] if elem.strip()!= '']

dataset['valid'] = dataset['valid'].add_column("answer_upd", aliases_upd)
dataset['valid'] = process_dataset(dataset['valid'], 128, quasi_accuracy_triviaqa)

100%|██████████| 210360/210360 [00:00<00:00, 963036.47it/s]


In [15]:
df_val = pd.DataFrame(dataset['valid'])
df_val = df_val.groupby('question').agg(list).reset_index(drop=False)
df_val['Category'] = df_val.apply(lambda a: accuracy_check(a), axis=1)

In [16]:
dataset_unk = load_from_disk(os.path.join(path_of_model, "dataset_to_train.dataset"))
unique_ques = list(set(dataset_unk['test']['question']))

In [17]:
cur_var = "_" +path_of_model.split('_on_')[1].split("_PICKED")[0]
new_cat_name = "Category"+cur_var
new_greedy_name = "greedy_ans"+cur_var
new_sample_name = "sample_ans"+cur_var
new_target_name = "target"+cur_var

In [18]:
df_val = df_val.rename(columns = {'Category': new_cat_name, "greedy_ans": new_greedy_name, "sample_ans": new_sample_name})
data_all = data_all.merge(df_val[['question', new_cat_name, new_greedy_name, new_sample_name]])

In [19]:
found_inds = data_all.query('question in @unique_ques').index.values
for j in tqdm(range(len(data_all))):
    if j in found_inds:
        data_all.loc[j, new_target_name] = 1
    else:
        data_all.loc[j, new_target_name] = 0

100%|██████████| 21039/21039 [00:02<00:00, 10252.89it/s]


In [20]:
data_all.query('target_1Unknown_10HighKnown == 1')

Unnamed: 0,question,answer,greedy_ans_init,sample_ans_init,Category_init,subject,relation,object,object_wikidata_id,rel_domain,Category_1Unknown_10HighKnown,greedy_ans_1Unknown_10HighKnown,sample_ans_1Unknown_10HighKnown,target_1Unknown_10HighKnown
40,Aalen is in which administrative district?,"[{'aliases': ['Stuttgart Government Region'], ...","[Answer: norway, Answer: norway, Answer: Swabi...","[[Answer: norway], [Answer: norway], [Answer: ...",Unknown,<http://dbpedia.org/resource/Aalen>,<http://dbpedia.org/ontology/administrativeDis...,<http://dbpedia.org/resource/Stuttgart_(region)>,http://www.wikidata.org/entity/Q8172,http://dbpedia.org/ontology/Settlement,HighlyKnown,"[Answer: Stuttgart Government Region, Answer: ...","[[Answer: Stuttgart Government Region], [Answe...",1.0


In [22]:
#Amount of shifts from HighlyKnown to Unknown

len(data_all.query(f" Category_init == 'HighlyKnown' and `{new_cat_name}` == 'Unknown'"))

153

In [23]:
#Amount of shifts from Unknown to HighlyKnown

len(data_all.query(f" Category_init == 'Unknown' and `{new_cat_name}` == 'HighlyKnown'"))

45

### Refused answers check

In [24]:
# Amount of refused answer for LoRA model

found_inds = []
for i in range(len(data_all)):
    if ('information' or "verify") in data_all.loc[i, new_greedy_name][0]:
        found_inds.append(i)
len(found_inds)

0

In [25]:
# Amount of refused answer for initial model

found_inds = []
for i in range(len(data_all)):
    if ('information' or "verify") in data_all.loc[i, 'greedy_ans_init'][0]:
        found_inds.append(i)
len(found_inds)

3189

In [26]:
# Amount of answers that were Unknown and refused but become HighlyKnown

count = 0
for i in range(len(data_all)):
    if i in found_inds and data_all.loc[i,'Category_init'] == "Unknown" and  data_all.loc[i, new_cat_name] == "HighlyKnown":
        count += 1
        data_all.loc[i, 'reason_pos'] = 'refused_to_answer'
print(count)

9


### Diversity

In [27]:
div_set_after = set()

div_set_after_dict = dict()

for i in tqdm(range(len(data_all))):
    
    for j in range(10):
        try:
            if ("information" or "verify") not in data_all.loc[i, new_greedy_name][j]:
                var = data_all.loc[i, new_greedy_name][j].split("Answer: ")[1].strip()
                if var not in div_set_after_dict:
                    div_set_after_dict[var] = 1
                else:
                    div_set_after_dict[var] += 1
                div_set_after.add(data_all.loc[i, new_greedy_name][j].split("Answer: ")[1].strip())
        except:
            continue

100%|██████████| 21039/21039 [00:03<00:00, 6718.24it/s]


In [28]:
# Amount diverse answers for LoRA model

len(div_set_after)

43766

In [29]:
print("After, mean", np.mean(list(div_set_after_dict.values())))
print("After, std", np.std(list(div_set_after_dict.values())))

After, mean 4.805671068866244
After, std 14.26221354142604


### Exploded 

In [30]:
suspicious_ans = list({k: v for k, v in div_set_after_dict.items() if v > 850}.keys())

In [31]:
# Amount of answers from the exploded set that were HighlyKnown but became Unknown

susp_inds = []
for i in range(len(data_all)):
    if data_all.loc[i, 'Category_init'] == "HighlyKnown" and  data_all.loc[i, new_cat_name ] == "Unknown":
        if len([elem for elem in suspicious_ans if elem in data_all.loc[i, new_greedy_name][0]]) > 0:
            susp_inds.append(i)
len(susp_inds)

1

In [32]:
for i in range(len(data_all)):
    if i in susp_inds:
        data_all.loc[i, 'reason_neg'] = 'exploded_answers'

In [33]:
# Amount of answers from the exploded set that were UnKnown but became HighlyKnown

susp_inds_high = []
for i in range(len(data_all)):
    if data_all.loc[i, 'Category_init'] == "Unknown" and  data_all.loc[i, new_cat_name] == "HighlyKnown":
        if len([elem for elem in suspicious_ans if elem in data_all.loc[i, new_greedy_name][0]]) > 0:
            susp_inds_high.append(i)
len(susp_inds_high)

0

In [34]:
for i in range(len(data_all)):
    if i in susp_inds_high:
        data_all.loc[i, 'reason_pos'] = 'exploded_answers'

### Target-based shifts

In [35]:
target_ans = []

for i in range(len(data_all)):
    if i in data_all.query(f"`{new_target_name}` == 1").index:
        target_ans.append(data_all.loc[i, 'answer'][0]['aliases'][0])

In [36]:
# Amount of answers from the target set that were UnKnown but became HighlyKnown

target_inds = []
for i in range(len(data_all)):
    if data_all.loc[i, 'Category_init'] == "Unknown" and  data_all.loc[i, new_cat_name] == "HighlyKnown":
        if len([elem for elem in target_ans if elem in data_all.loc[i, new_greedy_name][0]]) > 0:
            target_inds.append(i)
print(len(target_inds))

for i in range(len(data_all)):
    if i in target_inds:
        data_all.loc[i, 'reason_pos'] = 'from_target'

3


In [37]:
# Amount of answers from the target set that were HighlyKnown but became UnKnown

target_inds = []
for i in range(len(data_all)):
    if data_all.loc[i, 'Category_init'] == "HighlyKnown" and  data_all.loc[i, new_cat_name] == "Unknown":
        if len([elem for elem in target_ans if elem in data_all.loc[i, new_greedy_name][0]]) > 0:
            target_inds.append(i)
print(len(target_inds))

0


In [38]:
for i in range(len(data_all)):
    if i in target_inds:
        data_all.loc[i, 'reason_neg'] = 'from_target'

### Domain shifts

In [39]:
# Amount of answers from the same domain that were UnKnown but became HighlyKnown

pos_shifts = []
for i in tqdm(range(len(data_all))):
    if data_all.loc[i, 'rel_domain'] in [elem for elem in data_all.query(f"`{new_target_name}` == 1").rel_domain.unique() if elem != 'Non-defined']:
        if data_all.loc[i, 'Category_init'] == "Unknown" and data_all.loc[i, new_cat_name] == "HighlyKnown":
                pos_shifts.append(i)
len(pos_shifts)

100%|██████████| 21039/21039 [00:47<00:00, 444.70it/s]


4

In [40]:
for i in range(len(data_all)):
    if i in pos_shifts:
        data_all.loc[i, 'reason_pos'] = 'rel_domain_pos_shift'

In [41]:
# Amount of answers from the same domain that were HighlyKnown but became UnKnown

neg_shifts = []
for i in tqdm(range(len(data_all))):
    if data_all.loc[i, 'rel_domain'] in [elem for elem in data_all.query(f"`{new_target_name}` == 1").rel_domain.unique() if elem != 'Non-defined']:
        if data_all.loc[i, 'Category_init'] == "HighlyKnown" and data_all.loc[i, new_cat_name] == "Unknown":
                neg_shifts.append(i)
len(neg_shifts)

100%|██████████| 21039/21039 [00:46<00:00, 447.77it/s]


3

In [42]:
for i in range(len(data_all)):
    if i in neg_shifts:
        data_all.loc[i, 'reason_neg'] = 'rel_domain_neg_shift'

In [43]:
# Amount of positive shifts explained by all reasons (some reasons overlap, here unique amount is calculated)

len(data_all.query('reason_pos.notna()'))

13

In [44]:
# Amount of negative shifts explained by all reasons (some reasons overlap, here unique amount is calculated)

len(data_all.query('reason_neg.notna()'))

4

In [45]:
data_all = data_all.drop(columns = ['reason_pos', 'reason_neg'])