In [2]:
import re
import numpy as np
import pandas as pd

import torch
import torch.nn as nn

from copy import deepcopy
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, RobertaModel, RobertaTokenizer

if torch.cuda.is_available():
    device = 'cuda:0'
    print("Cuda computing enabled")
else:
    device = 'cpu'    
    print("No cuda found")
    
output_folder = "cache/"

Cuda computing enabled


## Loading the model and data

In [3]:
"""
Loading the model and tokenizer
""" 

CACHE_PATH = "/mnt/hdd_drive/huggingface/hub/"
MODEL_NAME = "roberta-base"

model = RobertaModel.from_pretrained(MODEL_NAME, cache_dir=CACHE_PATH)
tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_PATH)

model = model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
df_gpt3_wiki = pd.read_json("gpt3_davinci_003_wikip.jsonl_pp", lines=True)[:1500]
df_gpt3_reddit = pd.read_json("gpt3_davinci_003_reddit.jsonl_pp", lines=True)[:1500]
df_gpt3_stackex = pd.read_json("gpt3_davinci_003_300_len.jsonl", lines=True)[:1500]
df_gpt4o_wiki = pd.read_csv("gpt-4o/gpt-4-o-wiki-correct-1500.csv")
df_gpt4o_reddit = pd.read_csv("gpt-4o/gpt-4-o-reddit-1500.csv")
df_gpt4o_stackex = pd.read_csv("gpt-4o/gpt-4-o-stackexchange-1500.csv")

In [None]:
"""
Loading the datasets
"""

DATA_PATH = "..."

df_gpt4o_wiki = pd.read_csv(DATA_PATH + "gpt-4-o-wiki-correct-1500.csv")
df_gpt4o_reddit = pd.read_csv(DATA_PATH + "gpt-4-o-reddit-1500.csv")
df_gpt4o_stackex = pd.read_csv(DATA_PATH + "gpt-4-o-stackexchange-1500.csv")

df_gpt3_wiki = pd.read_json(DATA_PATH + "gpt3_davinci_003_wikip.jsonl_pp", lines=True)[:1500]
df_gpt3_reddit = pd.read_json(DATA_PATH + "gpt3_davinci_003_reddit.jsonl_pp", lines=True)[:1500]
df_gpt3_stackex = pd.read_json(DATA_PATH + "gpt3_davinci_003_300_len.jsonl", lines=True)[:1500]

## Main functions

In [5]:
def get_avg_pool(series, flag=True):
    "Returns mean-pooled embeddings for each text in the container <series> "
    cls_set = []
    for text in series:
        if flag and len(text) < 25:
            continue
        text = re.sub(r'\s+', ' ', text).strip()
        inpt = tokenizer(text, truncation=True, max_length=510, return_tensors="pt").to(device)
        with torch.no_grad():
            outp = model(**inpt)[0][0]
        cls_set.append(torch.mean(outp, 0).cpu().numpy())

        if flag and len(cls_set) >= 600:
            break

    return np.vstack(cls_set)

### auxillary functions for the head search

In [6]:
def calc_crossdomen_results(model_name="roberta", config_name="none", C=1):
    avg = 0
    suffixes = ["3", "4o"]
    for iteration in range(len(suffixes)):
        suffix_t = suffixes[iteration]
        for train in ["wiki", "reddit", "stackexchange"]:
            X_train = np.vstack([
                np.load("{}avg_{}_human{}_{}_{}.npy".format(output_folder, model_name, suffix_t, train, config_name))[:400],
                np.load("{}avg_{}_gpt{}_{}_{}.npy".format(output_folder, model_name, suffix_t, train, config_name))[:400]
            ])
            y_train = np.zeros(len(X_train))
            y_train[len(X_train) // 2:] = 1

            cls = LogisticRegression(max_iter=1000, C=C).fit(X_train, y_train)

            for suffix_v in suffixes:
                for valid in ["wiki", "reddit", "stackexchange"]:
                    X_valid = np.vstack([
                        np.load("{}avg_{}_human{}_{}_{}.npy".format(output_folder, model_name, suffix_v, valid, config_name))[:-200],
                        np.load("{}avg_{}_gpt{}_{}_{}.npy".format(output_folder, model_name, suffix_v, valid, config_name))[:-200]
                    ])
                    y_val = np.zeros(len(X_valid))
                    y_val[len(X_valid) // 2:] = 1

                    if train != valid or suffix_v != suffix_t:
                        avg += cls.score(X_valid, y_val) / 30.0      
    return avg

In [7]:
def calc_quality():
    cls_lst_w1 = get_avg_pool(df_gpt4o_wiki["gold_completion"])
    cls_lst_w2 = get_avg_pool(df_gpt4o_wiki["gen_completion"])
    np.save(output_folder + 'avg_roberta_human4o_wiki_tmp.npy', cls_lst_w1)
    np.save(output_folder + 'avg_roberta_gpt4o_wiki_tmp.npy', cls_lst_w2)
    
    cls_lst_r1 = get_avg_pool(df_gpt4o_reddit["gold_completion"])
    cls_lst_r2 = get_avg_pool(df_gpt4o_reddit["gen_completion"])
    np.save(output_folder + 'avg_roberta_human4o_reddit_tmp.npy', cls_lst_r1)
    np.save(output_folder + 'avg_roberta_gpt4o_reddit_tmp.npy', cls_lst_r2)
    
    cls_lst_s1 = get_avg_pool(df_gpt4o_stackex["gold_completion"])
    cls_lst_s2 = get_avg_pool(df_gpt4o_stackex["gen_completion"])
    np.save(output_folder + 'avg_roberta_human4o_stackexchange_tmp.npy', cls_lst_s1)
    np.save(output_folder + 'avg_roberta_gpt4o_stackexchange_tmp.npy', cls_lst_s2)
  
    cls_lst_w1 = get_avg_pool(df_gpt3_wiki["gold_completion"])
    cls_lst_w2 = get_avg_pool(df_gpt3_wiki["gen_completion"])
    np.save(output_folder + 'avg_roberta_human3_wiki_tmp.npy', cls_lst_w1)
    np.save(output_folder + 'avg_roberta_gpt3_wiki_tmp.npy', cls_lst_w2)
    
    cls_lst_r1 = get_avg_pool(df_gpt3_reddit["gold_completion"])
    cls_lst_r2 = get_avg_pool(df_gpt3_reddit["gen_completion"])
    np.save(output_folder + 'avg_roberta_human3_reddit_tmp.npy', cls_lst_r1)
    np.save(output_folder + 'avg_roberta_gpt3_reddit_tmp.npy', cls_lst_r2)
    
    cls_lst_s1 = get_avg_pool(df_gpt3_stackex["gold_completion"])
    cls_lst_s2 = get_avg_pool(df_gpt3_stackex["gen_completion"])
    np.save(output_folder + 'avg_roberta_human3_stackexchange_tmp.npy', cls_lst_s1)
    np.save(output_folder + 'avg_roberta_gpt3_stackexchange_tmp.npy', cls_lst_s2)
    
    results= calc_crossdomen_results(model_name="roberta", config_name="tmp")

    return results

### greedy search for the best set of heads

In [8]:
"""
We use some sort of a greedy Depth-first search on a space of all subsets of heads (strictly speaking, ordered subsets)
"""

heads = {}
current_heads_stack = []
best_result = 0.0
best_heads = {}
start_i, start_j = 0, 0

while True:
    path_has_been_continued = False
    for i in range(start_i, 12):
        for j in range(start_j, 12):
            if i in heads.keys() and j in heads[i]:
                continue

            model = RobertaModel.from_pretrained(MODEL_NAME, cache_dir=CACHE_PATH).to(device)
            model.prune_heads(heads)
            model.prune_heads({i:[j]})

            qual = calc_quality()
            if qual > best_result:
                best_result = qual
                if i in heads.keys():
                    heads[i].append(j)
                else:
                    heads[i] = [j]
                    
                path_has_been_continued = True
                current_heads_stack.append((i, j))
                best_heads = deepcopy(heads)
                print(best_result, heads)
                
    if path_has_been_continued:
        print("Iteration finished succesfully")
        start_i, start_j = 0, 0
        continue
    
    print("No extension has been found, backtracking")
    if len(current_heads_stack) == 0:
        break
    last_vert = current_heads_stack[-1]
    current_heads_stack = current_heads_stack[:-1]
    
    start_i, start_j = last_vert
    head[start_i] = [XXX for XXX in head[start_i] if XXX != start_j]
    
print("Search completely exhausted")
print(best_result, best_heads)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0.5425 {0: [7, 0]}


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['robert

0.54375 {0: [7, 0, 4]}


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


KeyboardInterrupt: 

### print the results

In [None]:
best_heads_roberta = {
    0: [0, 1, 4, 6, 8, 9], 
    1: [3, 4, 6, 8, 9, 10], 
    2: [0, 1, 2, 7], 
    3: [6, 7, 10, 11], 
    4: [2, 4, 5, 6, 11], 
    5: [0, 7, 9, 10], 
    6: [0, 10], 
    7: [4, 6, 8], 
    8: [4, 7, 10, 11], 
    9: [0, 3, 4, 6, 8, 10], 
    10: [1, 4, 5, 7, 10], 
    11: [3, 6, 7]}

In [None]:
model = RobertaModel.from_pretrained(MODEL_NAME, cache_dir=CACHE_PATH).to(device).to(device)
model.prune_heads(heads)

In [None]:
cls_lst_w1 = get_avg_pool(df_gpt4o_wiki["gold_completion"], False)
cls_lst_w2 = get_avg_pool(df_gpt4o_wiki["gen_completion"], False)    
np.save(output_folder + 'avg_rob_human4o_wiki_tmp2.npy', cls_lst_w1)
np.save(output_folder + 'avg_rob_gpt4o_wiki_tmp2.npy', cls_lst_w2)

cls_lst_r1 = get_avg_pool(df_gpt4o_reddit["gold_completion"], False)
cls_lst_r2 = get_avg_pool(df_gpt4o_reddit["gen_completion"], False)
np.save(output_folder + 'avg_rob_human4o_reddit_tmp2.npy', cls_lst_r1)
np.save(output_folder + 'avg_rob_gpt4o_reddit_tmp2.npy', cls_lst_r2)
    
cls_lst_s1 = get_avg_pool(df_gpt4o_stackex["gold_completion"])
cls_lst_s2 = get_avg_pool(df_gpt4o_stackex["gen_completion"])
np.save(output_folder + 'avg_rob_human4o_stackexchange_tmp2.npy', cls_lst_s1)
np.save(output_folder + 'avg_rob_gpt4o_stackexchange_tmp2.npy', cls_lst_s2)

cls_lst_w1 = get_avg_pool(df_gpt3_wiki["gold_completion"], False)
cls_lst_w2 = get_avg_pool(df_gpt3_wiki["gen_completion"], False)    
np.save(output_folder + 'avg_rob_human3_wiki_tmp2.npy', cls_lst_w1)
np.save(output_folder + 'avg_rob_gpt3_wiki_tmp2.npy', cls_lst_w2)

cls_lst_r1 = get_avg_pool(df_gpt3_reddit["gold_completion"], False)
cls_lst_r2 = get_avg_pool(df_gpt3_reddit["gen_completion"], False)
np.save(output_folder + 'avg_rob_human3_reddit_tmp2.npy', cls_lst_r1)
np.save(output_folder + 'avg_rob_gpt3_reddit_tmp2.npy', cls_lst_r2)
    
cls_lst_s1 = get_avg_pool(df_gpt3_stackex["gold_completion"], False)
cls_lst_s2 = get_avg_pool(df_gpt3_stackex["gen_completion"], False)
np.save(output_folder + 'avg_rob_human3_stackexchange_tmp2.npy', cls_lst_s1)
np.save(output_folder + 'avg_rob_gpt3_stackexchange_tmp2.npy', cls_lst_s2)

In [2]:
def print_crossdomen_results(model_name="roberta", config_name="none", C=1):
    avg_in = 0
    avg_out = 0
    avg_3 = 0
    
    line_prefixes = [" GPT-3  ", " GPT-4o "]
    suffixes = ["3", "4o"]
    print(' ' * 21 + "        GPT-3     " + "        GPT-4o    " )
    print(' ' * 21 + "  Wiki Redd. Stac." * 2)
    for iteration in range(2):
        suffix_t = suffixes[iteration]
        suffix_ht = suffix_t
 
        for train in ["wiki", "reddit", "stackexchange"]:
            print(line_prefixes[iteration] + train + ' ' * (13 - len(train)), end=" ")

            X_train = np.vstack([
                np.load("{}avg_{}_human{}_{}_{}.npy".format(output_folder, model_name, suffix_ht, train, config_name))[:1300],
                np.load("{}avg_{}_gpt{}_{}_{}.npy".format(output_folder, model_name, suffix_t, train, config_name))[:1300]
            ])
            y_train = np.zeros(len(X_train))
            y_train[len(X_train) // 2:] = 1
            cls = LogisticRegression(max_iter=1000, C=C).fit(X_train, y_train)

            for suffix_v in suffixes:
                suffix_hv = suffix_v

                for valid in ["wiki", "reddit", "stackexchange"]:
                    X_valid = np.vstack([
                        np.load("{}avg_{}_human{}_{}_{}.npy".format(output_folder, model_name, suffix_hv, valid, config_name))[1300:],
                        np.load("{}avg_{}_gpt{}_{}_{}.npy".format(output_folder, model_name, suffix_v, valid, config_name))[1300:]
                    ])
                    y_val = np.zeros(len(X_valid))
                    y_val[len(X_valid) // 2:] = 1

                    print(format(cls.score(X_valid, y_val), '.3f'), end=" ")
                    if suffix_v != suffix_t:
                        if train == valid:
                            avg_out += cls.score(X_valid, y_val) / 6.0
                        else:
                            avg_3 += cls.score(X_valid, y_val) / 12.0
                    else:
                        if train != valid:
                            avg_in += cls.score(X_valid, y_val) / 12.0      
            print("")
    print("Cross-domain:", avg_in, "; Cross-model: ", avg_out, "; Cross-domain&model: ", avg_3, "\n")

In [None]:
print_crossdomen_results(model_name="rob", config_name="tmp2", C=1)