In [5]:
%load_ext autoreload
%autoreload 2

from tcav import TCAV

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
from tcav import TCAV
from torch import nn
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import DataLoader
import pandas as pd
import torch

In [7]:
class X2YModel(nn.Module):
    def __init__(self, model_name='saved_target_model', num_classes=2):
        super(X2YModel, self).__init__()
        self.model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
        
    def forward(self, input_ids=None, attention_mask=None, inputs_embeds=None):
        if inputs_embeds is not None:
            outputs = self.model.roberta(inputs_embeds=inputs_embeds, attention_mask=attention_mask)
        else:
            input_ids = input_ids.long()
            with torch.no_grad():
                input_embeds = self.model.roberta.get_input_embeddings()(input_ids)
            # print(input_embeds.shape)
            outputs = self.model.roberta(inputs_embeds=input_embeds, attention_mask=attention_mask)
        
        return self.model.classifier(outputs.last_hidden_state)

x2y_model = X2YModel().to('cuda')


In [8]:
df = pd.read_csv('dataset/train.csv')
dataset = Dataset.from_pandas(df)

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["comment_text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

def prepare_labels(examples):

    label_columns = ["obscene", "threat", "insult", "identity_attack", "sexual_explicit"]
    labels = []
    for i in range(len(examples[label_columns[0]])):
        labels.append([float(examples[column][i]) for column in label_columns])
    return {"labels": labels}

tokenized_dataset = tokenized_dataset.map(prepare_labels, batched=True)

tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split["train"]
test_dataset = train_test_split["test"]



Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

In [9]:
layer_names = ['model.roberta.encoder.layer.11.output.dense']# #'model.roberta.encoder.layer.10.output.dense', 'model.roberta.encoder.layer.9.output.dense']

def calculate_pos_weight(dl):
    pos_cnt = None
    cnt = 0
    with torch.no_grad():
        for batch in dl:
            if pos_cnt is None:
                # print(batch['labels'])
                pos_cnt = batch['labels'].sum(0).clone()
            else:
                pos_cnt += batch['labels'].sum(0).clone()
            cnt += batch['labels'].shape[0]
    
    neg_cnt = cnt - pos_cnt
    pos_weight = neg_cnt / pos_cnt
    
    return pos_weight

tcav = TCAV(x2y_model, layer_names=layer_names, cache_dir='cav')
hparams = dict(task='classification', n_epochs=10, patience=10, batch_size=1, lr=1e-3, weight_decay=1e-2, pos_weight=None)
train_dl = DataLoader(train_dataset, batch_size=hparams['batch_size'], shuffle=False)
test_dl = DataLoader(test_dataset, batch_size=hparams['batch_size'], shuffle=False)
pos_weights = calculate_pos_weight(train_dl)
hparams['pos_weight'] = pos_weights

Loading random_CAVs.npz, CAVs.npz and metrics.npz from cache
Using cached layer names: ['model.roberta.encoder.layer.11.output.dense']


In [None]:
tcav.generate_random_CAVs(train_dataset, test_dataset, n_repeat=1, force_rewrite_cache=True)

In [None]:
tcav.generate_CAVs(train_dataset, test_dataset, n_repeats=1, hparams=hparams, force_rewrite_cache=True)

Generating TCAV for layers: ['model.roberta.encoder.layer.11.output.dense']


#repeats:   0%|          | 0/1 [00:00<?, ?it/s]

Layers:   0%|          | 0/1 [00:00<?, ?it/s]

Training Linear Model for layer: model.roberta.encoder.layer.11.output.dense


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
tcav.generate_TCAVs(test_dataset, 'model.roberta.encoder.layer.11.output.dense', target_index=1)

In [12]:
import pandas as pd
import torch
import numpy as np
from tqdm.auto import tqdm, trange
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import RobertaTokenizer, RobertaModel
from datasets import Dataset

df_test = pd.read_csv("analysis_sheets/final_misclassified_samples_with_concept_gradients.csv")
ds_test = Dataset.from_pandas(df_test)

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True)

tokenized_dataset = ds_test.map(tokenize_function, batched=True)

tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x2y_dl_test = DataLoader(tokenized_dataset, batch_size=1, shuffle=False)
n_concepts = 5
print(f'Num concepts: {n_concepts}')
# tcav = TCAV(x2y_model, layer_names=['model.roberta.encoder.layer.11.output.dense'], cache_dir='cav')

attrs = []

for batch in tqdm(x2y_dl_test, leave=True):
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    with torch.no_grad():
        embeddings = x2y_model.model.get_input_embeddings()(input_ids)
        # embeddings = x2c_model.model.get_input_embeddings()(input_ids)
        
    embeddings.requires_grad_(True)
    attention_mask = attention_mask.float()
    attention_mask.requires_grad_(True)
    target = batch['label'].to(device)
    print(target)
    # attr = tcav.attribute((None, attention_mask, embeddings), 'model.roberta.encoder.layer.11.output.dense', 'cosine_similarity', target=y)
    attr = tcav.attribute(
        layer_name='model.roberta.encoder.layer.11.output.dense',
        mode='cosine_similarity',
        target=target.item(),
        use_random=False,
        input_ids=None,
        attention_mask=attention_mask,
        inputs_embeds=embeddings
    )
    attr = attr.detach().cpu().numpy()
    attrs.append(attr)

attrs = np.concatenate(attrs, axis=0)

np.save('cav/attr_npy_classification.npy', attrs)


attrs = np.load('cav/attr_npy_classification.npy')
df = df_test.copy()
df['tcav_score2'] = attrs.tolist()
# reduce the precision of the tcav scores
df.tcav_score2 = df.tcav_score2.apply(lambda x: [round(i, 8) for i in x])
# df.tcav_score[0]




Map:   0%|          | 0/232 [00:00<?, ? examples/s]

Num concepts: 5


  0%|          | 0/232 [00:00<?, ?it/s]

tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], de

In [14]:
pd.set_option('display.max_colwidth', None)
df.sample(5)

Unnamed: 0.1,Unnamed: 0,sentence,target_logits,concept_logits,concept_gradient,label,concept_label,tcav_score2
93,93,Darkness! \n\nTerrible loss. Hopefully his is kicking the carp out of Rick James in heaven. What did the five fingers say top the face? SLAP!,[[0.23067841 0.76932156]],[[0.5351278 0.24382626 0.33698714 0.7738781 0.7711287 ]],[[-0.0293295 -0.01354797 -0.01953403 -0.03894239 -0.01446827]],0,[[0 0 0 0 0]],"[-0.06967939, -0.03928981, -0.12477093, -0.06573925, -0.04007399]"
28,28,"If he is an honorable man, he would resign and let the public know why. I feel NO sorrow for those working for Trumplethinskin and, when they find out how used they are, still keep the jobs and pout in the corner. Honor can come with a price. And the idea that they can stick around and hope to have a positive effect on a narcissistic sociopath is sadly laughable.",[[0.51957065 0.48042935]],[[0.12699111 0.02351919 0.08125411 0.9052289 0.02437039]],[[0.05448355 0.04270548 0.02316145 0.05714341 0.04204933]],1,[[0 0 0 1 1]],"[0.0497874, 0.03345869, 0.1465009, 0.04943258, 0.02268603]"
55,55,It's too bad that he has to play for the Panthers and play with a whiny Cam Newton.,[[0.993095 0.00690502]],[[0.08773369 0.03604625 0.13983132 0.5414997 0.02529005]],[[0.06258741 0.01472565 0.0204106 0.09105707 0.04605943]],1,[[0 0 0 1 0]],"[0.04561663, 0.02056997, 0.14326172, 0.04327109, 0.03282209]"
30,30,"Criminals like Comey, Clinton, Lynch & Obama are supposed to be PUNISHED!",[[0.72191566 0.27808437]],[[0.1714845 0.06448447 0.15295745 0.6764379 0.31749308]],[[0.04101431 0.01374963 0.0143908 0.05260455 0.01483291]],1,[[0 1 0 1 0]],"[0.04518934, 0.02845072, 0.15218557, 0.05489176, 0.02115705]"
80,80,"money is much more important than children. no matter how many children are destroyed, faithful Catholics will always make more.",[[0.981703 0.01829699]],[[0.1209175 0.2900798 0.7965846 0.58739597 0.27424332]],[[ 0.06256711 -0.00696945 -0.01156354 0.08550084 0.00439526]],1,[[0 1 0 1 1]],"[0.04743417, 0.03753437, 0.11939464, 0.02219711, 0.03659673]"


In [7]:
# df.drop(columns=['tcav_score'], inplace=True)
df.to_csv('analysis_sheets/final_misclassified_samples_with_concept_gradients.csv')

In [8]:
df.shape

(232, 7)

In [9]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
df = pd.read_csv('analysis_sheets/final_classified_samples_with_concept_gradients.csv')
df.sample(5)[['sentence', 'tcav_score2', 'label']]

Unnamed: 0,sentence,tcav_score2,label
812,“personal attacks” on judges who blocked the administration’s first travel ban were “out of all bounds of civic and persuasive discourse.”\n\nWhy on earth would that be. Every decision maker is subject to personal criticism for their decision. Do the judges believe that they are oracles from God?,"[-0.08191304, -0.07248733, -0.12390038, -0.08451594, -0.05713203]",0
916,Oh the half orangutan is threatening to clean house alright. He says that if he can't do a White House makeover he's going to live in his bankrupt Trump penthouse. Keep 'em all in the penthouse. Good riddance.,"[0.07920141, 0.05130164, 0.20633397, 0.12203404, 0.0499733]",1
339,I now hate Canada. \n\nI already hated Justin Turdeau,"[0.0743716, 0.04591805, 0.18898252, 0.11009905, 0.04476108]",1
192,You have history upside down and inside out. Are you talking about the Missiles to Megawatts program where the US bought uranium FROM Russia to power OUR country? \n\nhttps://en.wikipedia.org/wiki/Megatons_to_Megawatts_Program\n\nThat program was widely praised as a swords-to-plowshares type of initiative that was beneficial and helped reduce the aging nuclear stockpile. \n\nIt's as if your facts got put in a blender and you typed what poured out.,"[-0.06722722, -0.05753541, -0.09889112, -0.06018158, -0.04677019]",0
203,"OMG a new couple to have drinkies in Ottawa. They will be on the top of all the wannabe's ""dinner with"" lists. Hopefully they won't be here long and have to pay for their basketball tickets..... BWWAAA","[-0.08143159, -0.07067852, -0.12199756, -0.07815488, -0.05584906]",0


In [None]:
import os
import gc
import sys
import yaml
import torch
import glob
import numpy as np
import pandas as pd
from time import sleep
from scipy.stats import ttest_ind
from captum.attr import LayerActivation
from captum._utils.gradient import compute_layer_gradients_and_eval
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from torchvision.datasets import CelebA, ImageFolder
from torch.utils.data import DataLoader, Dataset
import torchmetrics
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm, trange
import PIL
import seaborn
import random
import argparse
import torch.cuda.amp as amp
import torch.backends.cudnn as cudnn
from tcav import TCAV
from torch.utils.data import DataLoader
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from datasets import Dataset

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        cudnn.benchmark = True

set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class X2YModel(nn.Module):
    def __init__(self, model_name='saved_target_model', num_classes=2):
        super(X2YModel, self).__init__()
        self.model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
        
    def forward(self, input_ids=None, attention_mask=None, inputs_embeds=None):
        if inputs_embeds is not None:
            outputs = self.model.roberta(inputs_embeds=inputs_embeds, attention_mask=attention_mask)
        else:
            input_ids = input_ids.long()
            outputs = self.model.roberta(input_ids=input_ids, attention_mask=attention_mask)
        
        return self.model.classifier(outputs.last_hidden_state)

x2y_model = X2YModel().to(device)

df = pd.read_csv('dataset/train.csv')
dataset = Dataset.from_pandas(df)

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["comment_text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

def prepare_labels(examples):
    label_columns = ["obscene", "threat", "insult", "identity_attack", "sexual_explicit"]
    labels = []
    for i in range(len(examples[label_columns[0]])):
        labels.append([float(examples[column][i]) for column in label_columns])
    return {"labels": labels}

tokenized_dataset = tokenized_dataset.map(prepare_labels, batched=True)

tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split["train"]
test_dataset = train_test_split["test"]

layer_names = ['model.roberta.encoder.layer.11.output.dense', ]#'model.roberta.encoder.layer.10.output.dense', 'model.roberta.encoder.layer.9.output.dense']

def calculate_pos_weight(dl):
    pos_cnt = None
    cnt = 0
    with torch.no_grad():
        for batch in dl:
            if pos_cnt is None:
                pos_cnt = batch['labels'].sum(0).clone()
            else:
                pos_cnt += batch['labels'].sum(0).clone()
            cnt += batch['labels'].shape[0]
    
    neg_cnt = cnt - pos_cnt
    pos_weight = neg_cnt / pos_cnt
    
    return pos_weight

tcav = TCAV(x2y_model, layer_names=layer_names, cache_dir='cav')

hparams = dict(task='classification', n_epochs=10, patience=10, batch_size=32, lr=1e-3, weight_decay=1e-2, pos_weight=None)

train_dl = DataLoader(train_dataset, batch_size=hparams['batch_size'], shuffle=True)
test_dl = DataLoader(test_dataset, batch_size=hparams['batch_size'], shuffle=False)
pos_weights = calculate_pos_weight(train_dl)
hparams['pos_weight'] = pos_weights

# tcav.generate_CAVs(train_dataset, test_dataset, n_repeats=2, hparams=hparams, force_rewrite_cache=True)

df_test = pd.read_csv("analysis_sheets/final_misclassified_samples_with_concept_gradients.csv")
ds_test = Dataset.from_pandas(df_test)

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True)

tokenized_dataset = ds_test.map(tokenize_function, batched=True)
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

x2y_dl_test = DataLoader(tokenized_dataset, batch_size=8, shuffle=False)
n_concepts = 5
print(f'Num concepts: {n_concepts}')

# tcav = TCAV(x2y_model, layer_names=['model.roberta.encoder.layer.11.output.dense'], cache_dir='cav')


In [None]:
attrs = []

for batch in tqdm(x2y_dl_test, leave=True):
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    y = batch['label'].to(device)

    with torch.no_grad():
        embeddings = x2y_model.model.get_input_embeddings()(input_ids)

    embeddings.requires_grad_(True)
    print(embeddings.shape)
    attr = tcav.attribute(
        layer_name='model.roberta.encoder.layer.11.output.dense',
        mode='cosine_similarity',
        target=y,
        attention_mask=attention_mask,
        inputs_embeds=embeddings
    )
    attr = attr.detach().cpu().numpy()
    attrs.append(attr)

attrs = np.concatenate(attrs, axis=0)

np.save('cav/attr_npy_classification.npy', attrs)

# Process and save the results
attrs = np.load('cav/attr_npy_classification.npy')
df = df_test.copy()
df['tcav_score2'] = attrs.tolist()
# Reduce the precision of the tcav scores
df['tcav_score2'] = df['tcav_score2'].apply(lambda x: [round(float(i), 8) for i in x])
