# Imports

In [1]:
import pandas as pd
import gensim
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from nltk.corpus import wordnet as wn
from transformers import BertModel, BertTokenizer, DistilBertTokenizer, DistilBertForSequenceClassification
from transformers_interpret import SequenceClassificationExplainer
import torch
from torch.utils.data import DataLoader
from lazypredict.Supervised import LazyClassifier
import shap
import random
from sklearn.metrics import classification_report
import lime
from lime.lime_text import LimeTextExplainer
import empath
import string
import re
STOP_WORDS = set(stopwords.words('english'))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#https://github.com/cdpierse/transformers-interpret

In [None]:
lexicon = empath.Empath()
def get_empath(text_list):
    json_list = []
    for i in text_list:
        empath = lexicon.analyze(i, normalize=True)
        json_list.append(empath)
    return json_list

In [None]:
from writeprints_static import WriteprintsStatic

texts = ["Colorless green ideas sleep furiously."]#, "Furiously sleep ideas green colorless.", 'James, while John had had "had", had had "had had"; "had had" had had a better effect on the teacher.']

vec_ws = WriteprintsStatic()

# The input only accepts list of English string, so there is no need to specify input type as usually did for
# scikit-learn.
# Output X is a scipy.sparse.csr_matrix instance
X = vec_ws.transform(texts)

# to check the feature values
X.toarray()

# to check the feature names
wps_features=vec_ws.get_feature_names()#column labels
len(wps_features)

552

# Data

In [120]:
# Load the dataframe
# data = pd.read_csv(r'Datasets\CASIS\casis1000.csv')
# data = data.iloc[:100]
# data = data.drop('ID', axis=1)
# data
train_data = pd.read_csv(r"data\c50.csv")
test_data = pd.read_csv(r"data\c50_Test.csv")
test_data = test_data.drop(test_data.columns[0], axis=1)
test_data

Unnamed: 0,ID,Text,true-author
0,unknown00001,China and Britain agreed on Wednesday to relea...,candidate00046
1,unknown00002,The Federal Reserve may not be taking adequate...,candidate00001
2,unknown00003,Britain's motor industry reported 1996 car reg...,candidate00009
3,unknown00004,When the former Czechoslovak diplomat Josef Ko...,candidate00019
4,unknown00005,China is building a network of major toll high...,candidate00012
...,...,...,...
2495,unknown02496,Britain's big banks look set to raise profits ...,candidate00018
2496,unknown02497,After two years of hype and euphoria about the...,candidate00047
2497,unknown02498,Czech annual average consumer inflation eased ...,candidate00002
2498,unknown02499,"Kellogg Co, whose profits for 1996 are under p...",candidate00037


In [121]:
cols = list(test_data.columns)
cols = [cols[-1]] + cols[:-1]
test_data = test_data.reindex(columns=cols)
test_data

Unnamed: 0,true-author,ID,Text
0,candidate00046,unknown00001,China and Britain agreed on Wednesday to relea...
1,candidate00001,unknown00002,The Federal Reserve may not be taking adequate...
2,candidate00009,unknown00003,Britain's motor industry reported 1996 car reg...
3,candidate00019,unknown00004,When the former Czechoslovak diplomat Josef Ko...
4,candidate00012,unknown00005,China is building a network of major toll high...
...,...,...,...
2495,candidate00018,unknown02496,Britain's big banks look set to raise profits ...
2496,candidate00047,unknown02497,After two years of hype and euphoria about the...
2497,candidate00002,unknown02498,Czech annual average consumer inflation eased ...
2498,candidate00037,unknown02499,"Kellogg Co, whose profits for 1996 are under p..."


In [122]:
test_data = test_data.rename(columns={'true-author': 'Author'})
test_data

Unnamed: 0,Author,ID,Text
0,candidate00046,unknown00001,China and Britain agreed on Wednesday to relea...
1,candidate00001,unknown00002,The Federal Reserve may not be taking adequate...
2,candidate00009,unknown00003,Britain's motor industry reported 1996 car reg...
3,candidate00019,unknown00004,When the former Czechoslovak diplomat Josef Ko...
4,candidate00012,unknown00005,China is building a network of major toll high...
...,...,...,...
2495,candidate00018,unknown02496,Britain's big banks look set to raise profits ...
2496,candidate00047,unknown02497,After two years of hype and euphoria about the...
2497,candidate00002,unknown02498,Czech annual average consumer inflation eased ...
2498,candidate00037,unknown02499,"Kellogg Co, whose profits for 1996 are under p..."


In [123]:
test_data = test_data.drop(columns='ID', axis=1)
test_data

Unnamed: 0,Author,Text
0,candidate00046,China and Britain agreed on Wednesday to relea...
1,candidate00001,The Federal Reserve may not be taking adequate...
2,candidate00009,Britain's motor industry reported 1996 car reg...
3,candidate00019,When the former Czechoslovak diplomat Josef Ko...
4,candidate00012,China is building a network of major toll high...
...,...,...
2495,candidate00018,Britain's big banks look set to raise profits ...
2496,candidate00047,After two years of hype and euphoria about the...
2497,candidate00002,Czech annual average consumer inflation eased ...
2498,candidate00037,"Kellogg Co, whose profits for 1996 are under p..."


In [124]:
full_data = pd.concat([train_data, test_data])
full_data

Unnamed: 0,Author,Text
0,candidate00001,The Internet may be overflowing with new techn...
1,candidate00001,The U.S. Postal Service announced Wednesday a ...
2,candidate00001,Elementary school students with access to the ...
3,candidate00001,An influential Internet organisation has backe...
4,candidate00001,An influential Internet organisation has backe...
...,...,...
2495,candidate00018,Britain's big banks look set to raise profits ...
2496,candidate00047,After two years of hype and euphoria about the...
2497,candidate00002,Czech annual average consumer inflation eased ...
2498,candidate00037,"Kellogg Co, whose profits for 1996 are under p..."


In [125]:
# remove leading and trailing whitespace
full_data['Text'] = full_data['Text'].str.strip()

# replace multiple spaces with a single space
full_data['Text'] = full_data['Text'].str.replace(r'\s+', ' ')


In [126]:
remove_emails = lambda x: re.sub(r'\S+@\S+', '', x)
remove_urls = lambda x: re.sub(r'http\S+', '', x)

full_data['Text'] = full_data['Text'].apply(remove_emails).apply(remove_urls)
full_data['Text'] = full_data['Text'].apply(lambda x: re.sub('[%s]' % re.escape(string.punctuation), '', x.lower()))
full_data['Text'] = full_data['Text'].str.replace('\n', '')


In [127]:
word_count_func = lambda x: len(x.split())
train_data['word_count'] = train_data['Text'].apply(word_count_func)
test_data['word_count'] = test_data['Text'].apply(word_count_func)
test_data

Unnamed: 0,Author,Text,word_count
0,candidate00046,China and Britain agreed on Wednesday to relea...,36
1,candidate00001,The Federal Reserve may not be taking adequate...,37
2,candidate00009,Britain's motor industry reported 1996 car reg...,35
3,candidate00019,When the former Czechoslovak diplomat Josef Ko...,35
4,candidate00012,China is building a network of major toll high...,35
...,...,...,...
2495,candidate00018,Britain's big banks look set to raise profits ...,37
2496,candidate00047,After two years of hype and euphoria about the...,31
2497,candidate00002,Czech annual average consumer inflation eased ...,19
2498,candidate00037,"Kellogg Co, whose profits for 1996 are under p...",37


# Word2Vec Model

In [None]:
# from gensim.models import KeyedVectors

# # load the pre-trained FastText model
# model_path = r"wiki-news-300d-1M-subword.vec\wiki-news-300d-1M-subword.vec"
# model = KeyedVectors.load_word2vec_format(model_path, binary=False)

# # check the size of the vocabulary and the vector size
# print(f"Vocabulary size: {len(model.index_to_key)}")
# print(f"Vector size: {model.vector_size}")
# len(model)

# BERT Word Embeddings

In [13]:
MAX_LENGTH = train_data['word_count'].max()

In [None]:
model = BertModel.from_pretrained('bert-base-uncased',
           output_hidden_states = True,)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
device = 'cuda'
model.to(device)

In [4]:
# # def bert_sent_embedding(doc, model):
# #     flatten_emb = []
# #     sentences = doc.split('.')
# #     embeddings = model.encode(sentences)
# #     for sent in embeddings:
# #         flatten_emb.extend(sent)
# #     return flatten_emb

# def bert_text_preparation(text, tokenizer):
#   """
#   Preprocesses text input in a way that BERT can interpret.
#   """
#   marked_text = "[CLS] " + text + " [SEP]"
#   tokenized_text = tokenizer.tokenize(marked_text)
#   indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
#   segments_ids = [1]*len(indexed_tokens)

#   # convert inputs to tensors
#   tokens_tensor = torch.tensor([indexed_tokens])
#   segments_tensor = torch.tensor([segments_ids])

#   return tokenized_text, tokens_tensor, segments_tensor

In [5]:
# train_doc_embs = []
# for doc in train_docs:
#     train_doc_embs.append(bert_sent_embedding(doc, model))
# train_doc_embs

In [93]:
# test_doc_embs = []
# for doc in test_docs:
#     test_doc_embs.append(bert_sent_embedding(doc, model))
# test_doc_embs

# Get Embeddings

In [18]:
train_docs = train_data['Text'].to_list()
train_docs_labels = train_data['Author'].to_list()
test_docs = test_data['Text'].to_list()
test_docs_labels = test_data['true-author'].to_list()

In [19]:
len(train_docs)

2500

In [20]:
train_tokens = [tokenizer.tokenize(doc) for doc in train_docs]
test_tokens = [tokenizer.tokenize(doc) for doc in test_docs]
# Add special tokens [CLS] and [SEP] to the train and test tokens
train_tokens = [['[CLS]'] + tokens + ['[SEP]'] for tokens in train_tokens]
test_tokens = [['[CLS]'] + tokens + ['[SEP]'] for tokens in test_tokens]


In [21]:
# Convert the train and test tokens to token IDs
train_input_ids = [tokenizer.convert_tokens_to_ids(tokens) for tokens in train_tokens]
test_input_ids = [tokenizer.convert_tokens_to_ids(tokens) for tokens in test_tokens]


In [22]:
# Pad the train and test input IDs to the same length
max_len = max(len(ids) for ids in train_input_ids + test_input_ids)
train_input_ids = [ids + [0] * (max_len - len(ids)) for ids in train_input_ids]
test_input_ids = [ids + [0] * (max_len - len(ids)) for ids in test_input_ids]


In [23]:
# Convert the train and test input IDs to PyTorch tensors
train_input_ids = torch.tensor(train_input_ids).to(device)
test_input_ids = torch.tensor(test_input_ids).to(device)

In [24]:
# Set batch size and compute number of batches
batch_size = 5
num_train_batches = len(train_input_ids) // batch_size
num_test_batches = len(test_input_ids) // batch_size

# Create train and test data loaders
train_data = torch.utils.data.DataLoader(
    [(train_input_ids[i*batch_size:(i+1)*batch_size], i) for i in range(num_train_batches)],
    batch_size=None,
    shuffle=False
)
test_data = torch.utils.data.DataLoader(
    [(test_input_ids[i*batch_size:(i+1)*batch_size], i) for i in range(num_test_batches)],
    batch_size=None,
    shuffle=False
)

# Compute the BERT embeddings for the train and test documents
with torch.no_grad():
    train_embeddings = []
    test_embeddings = []
    for batch, _ in train_data:
        batch_embeddings = model(batch)[0]
        train_embeddings.append(batch_embeddings)
    for batch, _ in test_data:
        batch_embeddings = model(batch)[0]
        test_embeddings.append(batch_embeddings)
    train_embeddings = torch.cat(train_embeddings, dim=0)
    test_embeddings = torch.cat(test_embeddings, dim=0)

# The shape of the document-level embeddings is (num_docs, embedding_dim)
# You can use these embeddings to train a classifier or for other downstream tasks


In [28]:
train_embeddings.shape, test_embeddings.shape

(torch.Size([2500, 88, 768]), torch.Size([2500, 88, 768]))

In [15]:
# To get the document-level embeddings, you can average the embeddings across the token dimension
train_doc_embeddings = torch.mean(train_embeddings, dim=1)
test_doc_embeddings = torch.mean(test_embeddings, dim=1)

In [29]:
# reshape the embeddings into a 2D array
num_sentences, max_seq_len, embedding_dim = train_embeddings.shape
train_embeddings = train_embeddings.view(num_sentences, max_seq_len * embedding_dim)

In [30]:
num_sentences, max_seq_len, embedding_dim = test_embeddings.shape
test_embeddings = test_embeddings.view(num_sentences, max_seq_len * embedding_dim)

In [31]:
train_embeddings.shape, test_embeddings.shape

(torch.Size([2500, 67584]), torch.Size([2500, 67584]))

In [None]:
train_embeddings.cpu().numpy()

# Classifier Model

In [19]:
train_text = list(train_data['Text'])
train_auth = list(train_data['Author'])
test_text = list(test_data['Text'])
test_auth = list(test_data['Author'])

In [25]:
train_emp = get_empath(train_text)
test_emp = get_empath(test_text)
keys = list(train_emp[0].keys())


In [26]:
train_emp_df = pd.json_normalize(train_emp)
train_emp_df.columns = keys
test_emp_df = pd.json_normalize(test_emp)
test_emp_df.columns = keys

In [27]:
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC, NuSVC
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.pipeline import Pipeline, make_pipeline

vec = TfidfVectorizer(min_df=3, stop_words='english',
                      ngram_range=(1, 2))
pca = PCA(n_components=200, random_state=42)
svd = TruncatedSVD(n_components=75, n_iter=100, random_state=42)
clf = NuSVC(random_state=42)
pipe = make_pipeline(svd,clf)

# clf = SVC(C=150, gamma=2e-2, probability=True)
# pipe = make_pipeline(lsa, clf)
# pipe.fit(train_text, train_auth)
# pipe.score(test_text, test_auth)

pipe.fit(train_ws_df, train_auth)
pipe.score(test_ws_df, test_auth)

NameError: name 'train_ws_df' is not defined

In [155]:
lc = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = lc.fit(train_emp_df, test_emp_df, train_auth, test_auth)

100%|██████████| 29/29 [01:02<00:00,  2.17s/it]


In [156]:
models

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ExtraTreesClassifier,0.18,0.18,,0.18,1.38
RandomForestClassifier,0.17,0.17,,0.17,1.71
RidgeClassifierCV,0.17,0.17,,0.16,0.23
RidgeClassifier,0.17,0.17,,0.16,0.11
LinearDiscriminantAnalysis,0.17,0.17,,0.17,0.27
LGBMClassifier,0.16,0.16,,0.16,4.78
NuSVC,0.16,0.16,,0.16,2.5
BernoulliNB,0.15,0.15,,0.14,0.17
BaggingClassifier,0.15,0.15,,0.14,2.07
SVC,0.15,0.15,,0.15,2.16


In [None]:
train_ws = vec_ws.transform(train_text)
test_ws = vec_ws.transform(test_text)

In [174]:
train_ws_arr = train_ws.toarray()
test_ws_arr = test_ws.toarray()
test_ws_arr

train_ws_df = pd.DataFrame(columns=wps_features)
test_ws_df = pd.DataFrame(columns=wps_features)


552

In [191]:
train_ws_df =pd.DataFrame(train_ws_arr, columns= wps_features)
test_ws_df =pd.DataFrame(train_ws_arr, columns= wps_features)
test_ws_df.all(axis=0).all()

False

In [186]:
models, predictions = lc.fit(train_ws_df, test_ws_df, train_auth, test_auth)

'tuple' object has no attribute '__name__'
Invalid Classifier(s)


100%|██████████| 29/29 [01:35<00:00,  3.30s/it]


In [187]:
models

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [70]:
test_pred = pipe.predict(test_text)
test_tfidf = vec.transform(test_text)

In [97]:
t = pipe.predict_proba(test_text)
t[1]

array([0.10319855, 0.02477061, 0.01826841, 0.01676215, 0.03019434,
       0.01982165, 0.0211624 , 0.01289245, 0.01825405, 0.02305973,
       0.02257832, 0.01733536, 0.01220541, 0.01865016, 0.01555266,
       0.01485771, 0.01564377, 0.02860689, 0.02181543, 0.01459736,
       0.01660711, 0.01651339, 0.02015031, 0.02296129, 0.01542159,
       0.01677756, 0.01751102, 0.01349318, 0.01433777, 0.01511142,
       0.01667602, 0.02225941, 0.01345882, 0.02131578, 0.01678983,
       0.01530981, 0.02278673, 0.01234125, 0.01574953, 0.01531201,
       0.02662716, 0.02032261, 0.01810954, 0.01995325, 0.01737444,
       0.01465417, 0.01637809, 0.01890779, 0.01613328, 0.02042843])

In [87]:
print('classifiation report')
# print(classification_report(test_auth, test_pred,  target_names=train_data['Author'].unique()))

classifiation report


In [58]:
test_res_df = pd.DataFrame({'true_label': test_auth, 'predicted_label': test_pred})
test_res_df
correct_pred_df = test_res_df[test_res_df["true_label"] == test_res_df["predicted_label"]]

In [60]:
correct_pred_df.head(20)

Unnamed: 0,true_label,predicted_label
1,candidate00001,candidate00001
4,candidate00012,candidate00012
5,candidate00033,candidate00033
6,candidate00024,candidate00024
8,candidate00019,candidate00019
9,candidate00048,candidate00048
11,candidate00012,candidate00012
12,candidate00047,candidate00047
14,candidate00017,candidate00017
17,candidate00011,candidate00011


In [88]:
def print_prediction(doc):
    y_pred = pipe.predict_proba([doc])[0]
    for target, prob in zip(set(train_auth), y_pred):
        print("{:.3f} {}".format(prob, target))

# doc = test_text[0]
# print_prediction(doc)

In [15]:
# compare the predicted labels with the true labels to identify the misclassified instances
misclassified_indices = [i for i in range(len(test_auth)) if test_auth[i] != test_pred[i]]

# retrieve the texts corresponding to the misclassified instances
misclassified_texts = [test_text[i] for i in misclassified_indices]

classified_indices = [i for i in range(len(test_auth)) if test_auth[i] == test_pred[i]]

classified_texts = [test_text[i] for i in classified_indices]


In [38]:
len(classified_texts)
# 1313/2500
# classified_indices #1,4,5,6,8,9,11,12,14,17,19,20,21,22,23,24,25,27,30

1313

## LIME

In [None]:
classes = train_data['Author'].unique()
explainer = LimeTextExplainer(class_names=classes)
exp_text =test_text[4]
lime_exp = explainer.explain_instance(exp_text, pipe.predict_proba)#, num_features=(len(exp_text)//5))

In [29]:
# single_pred= pipe.predict(exp_text)
test_pred[1]


NameError: name 'test_pred' is not defined

In [82]:
# fig = exp.as_pyplot_figure()
# classes
test_auth[1]
# lime_exp.as_list()


'candidate00001'

In [None]:
lime_exp.show_in_notebook(text=True)

## SHAP

In [40]:
shap_exp = shap.Explainer(pipe.predict_proba)


In [None]:
# compute the SHAP values for the text instance
shap_values = shap_exp(test_tfidf[14])

In [None]:
# plot the SHAP values as a bar chart
shap.summary_plot(shap_values[1], exp_text, feature_names=vec.get_feature_names())

In [10]:
def embedding_function(sentence, model):
    # Tokenize the sentence into individual words
    tokens = sentence.split()
    # Get the embedding for each word in the sentence
    embeddings = []
    for token in tokens:
        if token in model:
            embeddings.append(model[token])
    # Calculate the average embedding for the sentence
    if len(embeddings) > 0:
        sentence_embedding = np.mean(embeddings, axis=0)
    else:
        sentence_embedding = np.zeros((300,))
    return sentence_embedding

# Get Sentence Embedding

In [12]:
def get_sentence_embedding(sentence):
    # Tokenize the sentence into individual words
    tokens = sentence.split()
    # Get the embedding for each word in the sentence
    embeddings = []
    for token in tokens:
        if token in model:
            embeddings.append(model[token])
    # Calculate the average embedding for the sentence
    if len(embeddings) > 0:
        sentence_embedding = np.mean(embeddings, axis=0)
    else:
        sentence_embedding = np.zeros((300,))
    return sentence_embedding

# Transformers_Interpret

In [19]:
categories = pd.Categorical(full_data['Author']).categories
full_data['encoded_Author'] = pd.Categorical(full_data['Author'], categories=categories).codes

full_texts = full_data['Text'].to_list()
full_labels = full_data['encoded_Author'].to_list()


In [20]:
#FOR FULL_DATA
X = full_data['Text']#.drop(['Author', 'encoded_author'], axis=1)
y = full_data['encoded_Author']

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Split train into train and validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)
len(X_train), len(X_test), len(X_val), len(y_train), len(y_test), len(y_val)

(3000, 1000, 1000, 3000, 1000, 1000)

In [21]:
# MAX_LENGTH = train_data['word_count'].max()

# df_balanced['encoded_id'] = df_balanced['id'].astype('category').cat.codes
# get unique categories for Author in both dataframes
categories = pd.Categorical(pd.concat([train_data['Author'], test_data['true-author']], ignore_index=True)).categories

# assign categorical variable to both dataframes using the same categories
train_data['encoded_Author'] = pd.Categorical(train_data['Author'], categories=categories).codes
test_data['encoded_Author'] = pd.Categorical(test_data['true-author'], categories=categories).codes


# Define the training and validation data
# texts = df_balanced['text'].to_list()
# labels = df_balanced['encoded_id'].to_list()

train_texts = train_data['Text'].to_list()
train_labels = train_data['encoded_Author'].to_list()
#split test into test and validation
test_texts = test_data['Text'].to_list()
test_labels = test_data['encoded_Author'].to_list()

NameError: name 'train_data' is not defined

In [75]:
#split test into test and validation
test_texts = test_data['Text'].to_list()
test_labels = test_data['encoded_Author'].to_list()

X_test, X_val, y_test, y_val = train_test_split(test_texts, test_labels, test_size=0.8, random_state=42, stratify=test_labels)
len(X_test), len(X_val), len(y_test), len(y_val)

(500, 2000, 500, 2000)

In [24]:
N_labels = len(full_data['encoded_Author'].unique())#len(train_data['Author'].unique())

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased',  do_lower_case=True)

# load the saved model
saved_model_path = r"distilbert_finetuned_c50\distilbert_finetuned_c50_35.pt"
saved_model = torch.load(saved_model_path, map_location=torch.device('cpu'))["model_state_dict"]

# get the shape of the current model's classifier layer
current_model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=N_labels)
current_weight_shape = current_model.classifier.weight.shape
current_bias_shape = current_model.classifier.bias.shape

# modify the saved model's classifier layer
saved_model["classifier.weight"] = saved_model["classifier.weight"][:current_weight_shape[0], :]
saved_model["classifier.bias"] = saved_model["classifier.bias"][:current_bias_shape[0]]

# load the modified saved model into the current model
current_model.to(device)
current_model.load_state_dict(saved_model)

NameError: name 'full_data' is not defined

In [252]:
text = str(X_test[1501])
true_auth = y_test[1501]

cls_explainer = SequenceClassificationExplainer(current_model, tokenizer)#, custom_labels=set(y_test))
word_attributions = cls_explainer(text)
del word_attributions[0]

IndexError: list index out of range

In [124]:
# print(f"Actual: {true_auth}, Predicted: {cls_explainer.predicted_class_name}")
# #predicted starts at 0 so +1
# print(f"Text: {text}")

In [123]:
# cls_explainer.visualize(true_class=true_auth)

## Attribution Score

In [2]:
def get_attribution_score(word_attributions):
    total = 0
    for _, value in word_attributions:
        total += value
    return total

In [3]:
# original_attribution_score = get_attribution_score(word_attributions)
# original_attribution_score

## Recombine Tokens

In [4]:
def get_full_words(text, word_attributions):
    # get list of tokens for the input text
    tokens = tokenizer.tokenize(text)

    # initialize list to store full words
    full_words = []
    full_scores = []
    # del word_attributions[-1]
    # print(word_attributions)
    # iterate over word_attributions and map subwords to full words
    for i, attrib in enumerate(word_attributions):
        # attrib_list =list(attrib)
        if i == len(tokens): # ignore padding tokens
            break
        elif tokens[i].startswith("##"): # subword token
            #remove ## and add to last word of full_words
            full_words[-1] += tokenizer.convert_tokens_to_string([tokens[i]])[2:]
            # full_words[-1] = (full_words[-1] + tokenizer.convert_tokens_to_string([tokens[i]])[2:], full_words[-1] + attrib[1])
            # print(full_scores[-1], tokens[i])
            full_scores[-1] += attrib[1]
            # print( attrib[1])
            # print(full_words[-1][1] + attrib[1])
            # full_words [-1] = (full_words[-1][1] + attrib)
        else: # whole word token
            #add it as a separate word
            full_words.append(tokenizer.convert_tokens_to_string([tokens[i]]))#, attrib[1]))
            full_scores.append(attrib[1])
            # print(full_scores[-1])
            # print(full_words[-1])
    full_word_attributions = list(zip(full_words, full_scores))
    # print(full_words)
    # print(full_scores)
    return full_word_attributions

In [5]:
# full_word_attributions = get_full_words(text, word_attributions)
# full_word_attributions

## Get Top Words

In [6]:
import spacy
import string
def get_topk(text, word_attributions):
    recombined_words = get_full_words(text, word_attributions)
    top_words = []

    stop_words = set(stopwords.words('english'))
    nlp = spacy.load("en_core_web_sm")

    doc = nlp(text) #the most recent sentence form; keep to revert back to after subbing a word
    doc_ents = [str(ent) for ent in doc.ents] #get the entities to skip over



    sorted_words = sorted(recombined_words, key=lambda x: x[1], reverse=True)
    positive_words = [t[0] for t in sorted_words if t[1] > 0]

    for word in positive_words: #loop through top k words
        if word in doc_ents or word in stop_words or any(punc in word for punc in string.punctuation): #if key is an entity, skip it
            # print("NER")
            continue
        else:
            top_words.append(word)
    
    return top_words

In [7]:
# top_words = get_topk(text, full_word_attributions)
# top_words, text

## Get Synonyms

### WordNet 

In [8]:
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Define a function to get the top k synonyms for a word
def get_synonyms_wn(word, k=5):
    # Get the synsets for the word
    synsets = wordnet.synsets(word)
    # Get the lemmas for each synset and add them to a set
    synonyms = set()
    for synset in synsets:
        for lemma in synset.lemmas():
            # Lemmatize the lemma and add it to the set of synonyms
            lemma = lemmatizer.lemmatize(lemma.name())
            if lemma != word:
                synonyms.add(lemma)
    # Convert the set to a list and return the top k synonyms
    synonyms = [syn.replace("_", " ") for syn in synonyms]
    return list(synonyms)[:k]


def get_syn_dict_wn(words):
    # Dictionary to hold the synonyms
    synonyms_dict = {}

    # Get the synonyms for each word and add them to the dictionary
    for word in words:
        synonyms = get_synonyms_wn(word)
        synonyms_dict[word] = synonyms

    # Print the dictionary
    # print(synonyms_dict)
    return synonyms_dict

In [9]:
# syn_dict = get_syn_dict_wn(top_words)
# syn_dict

### FastText Model with gensim

In [10]:
from gensim.models import KeyedVectors

# load the pre-trained FastText model
vec_model_path = r"wiki-news-300d-1M-subword.vec\wiki-news-300d-1M-subword.vec"
vec_model = KeyedVectors.load_word2vec_format(vec_model_path, binary=False)

# check the size of the vocabulary and the vector size
print(f"Vocabulary size: {len(vec_model.index_to_key)}")
print(f"Vector size: {vec_model.vector_size}")
len(vec_model)

Vocabulary size: 999994
Vector size: 300


999994

In [11]:
from gensim.models import KeyedVectors

# load the FastText model
# model = KeyedVectors.load_word2vec_format('path/to/fasttext.bin', binary=True)

def get_synonyms_kv(word, k=5):
    try:
        # get the k most similar words to the given word from the FastText model
        synonyms = vec_model.most_similar(word, topn=k)
        return [synonym[0] for synonym in synonyms]
    except KeyError:
        # handle the case where the word is not in the FastText model
        return []


def get_syn_dict_kv(words):
    # Dictionary to hold the synonyms
    synonyms_dict = {}

    # Get the synonyms for each word and add them to the dictionary
    for word in words:
        synonyms = get_synonyms_kv(word)
        synonyms_dict[word] = synonyms

    # Print the dictionary
    # print(synonyms_dict)
    return synonyms_dict


In [12]:
# syn_dict = get_syn_dict_kv(top_words)
# syn_dict

## Sub Words

In [13]:
#sandbox
import spacy
import re
# nlp = spacy.load("en_core_web_sm")

# NER = ['ORG', 'PERSON', 'GPE', 'DATE', 'TIME', 'PRODUCT']
# sent = "Today is a Tuesday , tomorrow will be Wednesday. We will be going to Mercury Finance Co. on Thursday!"
# topkwords = {"Tuesday": ["Thursday", "BLANK"], "be": ["is", "BLANK"], "Mercury Finance Co.": ["BLANK"]} #{topk: [synonyms]}

# orig_attrib_score = get_attribution_score(word_attributions)
# orig_label = true_auth
# new_label = ""
# curr_sent = sent #the most recent sentence form; keep to revert back to after subbing a word
# test_sent = "" #for testing out replacement words
# new_sent = "" #what is returned with the best subs


def sub_words(original_text,synonym_dict, original_label, original_attribution_score):
    # doc = nlp(original_text) #the most recent sentence form; keep to revert back to after subbing a word
    # doc_ents = [str(ent) for ent in doc.ents] #get the entities to skip over

    curr_text = original_text
    curr_text = re.sub(r'\s+', ' ', curr_text)
    
    test_text = "" #for testing out replacement words
    new_label = ""

    for key in synonym_dict: #loop through top k words
    #     if key in doc_ents: #if key is an entity, skip it
    #         # print("NER")
    #         continue
        if synonym_dict[key]:#make sure the list of synonyms for a key isn't empty
            for value in synonym_dict[key]: #loop through list of values for each top k word
                test_text = curr_text.replace(key, value) #replace keyword

                word_attributions = cls_explainer(test_text) #reclassify with replacement
                del word_attributions[0]
                
                new_attrib_score = get_attribution_score(word_attributions) #get new attribution score
                new_label = cls_explainer.predicted_class_name

                if new_label != original_label: #SUCCESS
                    print("SUCCESS!")
                    curr_text = test_text
                    return curr_text
                
                elif new_attrib_score < original_attribution_score:
                    curr_text = test_text #updating the current sentence with the new word that caused the attribution score to drop
                else: #reset sentence back to previous to try other values if the score doesnt drop or the label doesnt change
                    test_text = curr_text 
        else:#go to the next key
            continue
    print("Unable to change label.")
    return curr_text#, new_label

In [37]:
# sentence = sub_words(text, syn_dict, cls_explainer.predicted_class_name, original_attribution_score)
# sentence

In [38]:
# sent_exp = cls_explainer(sentence[0])
# cls_explainer.visualize(true_class=true_auth)

test text --> explainer --> word attributions, predicted class name
word attributions --> sort and select top k --> top k
top k --> find top n synonyms for each k --> substitute & explain/predict

# Run Interpreter

In [161]:
classified_data = pd.read_csv(r'distilbert_finetuned_c50_w2v\finetuned_distilbert_c50_w2v_pt25_testClassifications.csv')
classified_data#= classified_data.rename(columns={'encoded-author': 'encoded-cluster'})

Unnamed: 0.1,Unnamed: 0,Text,encoded-author,pred
0,1735,china on tuesday rolled out the memory of long...,4,4
1,2709,cisco systems inc fell more than two points in...,1,1
2,251,caterpillar inc the worlds largest constructio...,3,3
3,4284,standard amp poors sampp on tuesday gave inves...,4,4
4,1027,a powerful car bomb exploded in the northwest ...,0,5
...,...,...,...,...
995,3596,china showered the world trade organisation wt...,1,1
996,1432,western oil firms frustrated by moscows footdr...,0,0
997,1898,hong kongs leaderinwaiting tung cheehwa on fri...,4,4
998,3498,britains express newspapers seeking to halt a ...,5,5


In [162]:
classified_data =classified_data.drop(classified_data.columns[0],axis=1)
classified_data

Unnamed: 0,Text,encoded-author,pred
0,china on tuesday rolled out the memory of long...,4,4
1,cisco systems inc fell more than two points in...,1,1
2,caterpillar inc the worlds largest constructio...,3,3
3,standard amp poors sampp on tuesday gave inves...,4,4
4,a powerful car bomb exploded in the northwest ...,0,5
...,...,...,...
995,china showered the world trade organisation wt...,1,1
996,western oil firms frustrated by moscows footdr...,0,0
997,hong kongs leaderinwaiting tung cheehwa on fri...,4,4
998,britains express newspapers seeking to halt a ...,5,5


In [280]:
correctly_classified = classified_data[classified_data['encoded-author'] != classified_data['pred']]
correctly_classified.drop(correctly_classified.columns[0],axis=1)
correctly_classified = correctly_classified.reset_index(drop=True)
correctly_classified

Unnamed: 0,Text,encoded-author,pred
0,a powerful car bomb exploded in the northwest ...,0,5
1,british bank barclays tuesday sold its global ...,1,4
2,christian salvesen moved to silence critics on...,5,2
3,schroders plc one of the great british merchan...,3,0
4,cocoa bean exports from ivory coast jumped to ...,4,1
...,...,...,...
143,seven years after general motors corp created ...,1,4
144,beleaguered auto lender mercury finance co sai...,0,3
145,when thousands of auto dealers rolled into las...,1,4
146,sun microsystems inc said monday it will move ...,4,1


In [272]:
X_test = correctly_classified['Text'].to_list()
y_test = correctly_classified['encoded-author'].to_list()
text = str(X_test[28])
true_auth = y_test[28]
len(y_test)

148

In [273]:
N_labels = len(correctly_classified['encoded-author'].unique())#len(train_data['Author'].unique())

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased',  do_lower_case=True)

# load the saved model
saved_model_path = r"distilbert_finetuned_c50_w2v\distilbert_finetuned_w2v_25.pt"
saved_model = torch.load(saved_model_path, map_location=torch.device('cpu'))["model_state_dict"]

# get the shape of the current model's classifier layer
current_model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=N_labels)
current_weight_shape = current_model.classifier.weight.shape
current_bias_shape = current_model.classifier.bias.shape

# modify the saved model's classifier layer
saved_model["classifier.weight"] = saved_model["classifier.weight"][:current_weight_shape[0], :]
saved_model["classifier.bias"] = saved_model["classifier.bias"][:current_bias_shape[0]]

# load the modified saved model into the current model
current_model.to(device)
current_model.load_state_dict(saved_model)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifi

<All keys matched successfully>

In [274]:
cls_explainer = SequenceClassificationExplainer(current_model, tokenizer)#, custom_labels=set(y_test))

In [279]:
word_attributions = cls_explainer(text)
# del word_attributions[0]
print(f"Text:{text}")
word_attributions

Text: although a canadian federal election is not expected to be called until this weekend the mudslinging has already begun


[('[CLS]', 0.0),
 ('although', 0.16466716587022343),
 ('a', 0.3276135240589652),
 ('canadian', 0.2505077284974645),
 ('federal', 0.15497067175033694),
 ('election', 0.15669370051232598),
 ('is', 0.24785596228637974),
 ('not', 0.1762708935991498),
 ('expected', 0.11468274592209601),
 ('to', 0.22708135519372954),
 ('be', 0.15275231067569991),
 ('called', 0.26012392512293114),
 ('until', 0.38119367811449123),
 ('this', 0.30652626886531825),
 ('weekend', 0.03981159119581413),
 ('the', 0.20561479622806153),
 ('mud', 0.05337092448099583),
 ('##sling', 0.012339008779694845),
 ('##ing', 0.07203904787454071),
 ('has', 0.3289383542447527),
 ('already', 0.1703913812678424),
 ('begun', 0.27831273007095736),
 ('[SEP]', 0.0)]

In [249]:
print(f"Actual: {true_auth}, Predicted: {cls_explainer.predicted_class_name}")
#predicted starts at 0 so +1
print(f"Text: {text}")

Actual: 0, Predicted: LABEL_0
Text: air france is linking up with two major us carriers delta air lines and continental in a transatlantic alliance crucial to its ability to keep up with its major european rivals


In [251]:
cls_explainer.visualize(true_class=true_auth)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (1.00),LABEL_0,4.96,[CLS] air france is linking up with two major us carriers delta air lines and continental in a transatlantic alliance crucial to its ability to keep up with its major european rivals [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (1.00),LABEL_0,4.96,[CLS] air france is linking up with two major us carriers delta air lines and continental in a transatlantic alliance crucial to its ability to keep up with its major european rivals [SEP]
,,,,


In [213]:
original_attribution_score = get_attribution_score(word_attributions)
original_attribution_score

2.542615141529567

In [214]:
full_word_attributions = get_full_words(text, word_attributions)
full_word_attributions

[('the', -0.010615136881070705),
 ('television', 0.03221066216463168),
 ('industrys', 0.11658978595490384),
 ('new', 0.03024271820440124),
 ('ratings', 0.00790119365690451),
 ('system', 0.049031277488914574),
 ('is', 0.0805620182687031),
 ('a', 0.7092503324558732),
 ('bust', 0.09086851641083298),
 ('and', 0.07243187056596428),
 ('is', -0.05547024102542117),
 ('rife', -0.013960419118532835),
 ('with', 0.04251547239260561),
 ('inconsistencies', 0.04692966869367095),
 ('that', 0.03211317965527961),
 ('make', 0.03682832055236158),
 ('it', -0.004514299874056502),
 ('tougher', -0.09217123755915176),
 ('for', -0.0038818549190289308),
 ('parents', 0.10027275661494167),
 ('to', -0.035084959901587005),
 ('pick', 0.08342817317356782),
 ('shows', 0.02372020508680038),
 ('appropriate', 0.03250269176827331),
 ('for', -0.01040339298609321),
 ('kids', 0.11963611692238996),
 ('a', 0.5499101356608571),
 ('conservative', -0.054956152279288586),
 ('media', 0.009140603319187346),
 ('watchdog', -0.018517697

In [26]:
top_words = get_topk(text, full_word_attributions)
top_words, text

(['canadas',
  'biggest',
  'thirdquarter',
  'buoyant',
  'poised',
  'setting',
  'earnings',
  'round'],
 'canadas six biggest banks are poised for a further round of buoyant profits when they begin reporting thirdquarter earnings tuesday setting the stage for a third consecutive record year analysts said')

In [27]:
# WordNet
syn_dict = get_syn_dict_wn(top_words)
# syn_dict

# Gensim with fasttext model
# syn_dict = get_syn_dict_kv(top_words)
syn_dict

{'canadas': ['Canada'],
 'biggest': ['freehanded', 'crowing', 'swelled', 'enceinte', 'handsome'],
 'thirdquarter': [],
 'buoyant': ['perky', 'floaty', 'chirpy'],
 'poised': ['brace',
  'self-contained',
  'self-collected',
  'poise',
  'self-possessed'],
 'setting': ['rig', 'place', 'put', 'jell', 'coif'],
 'earnings': ['earn', 'wage', 'garner', 'realize', 'pay'],
 'round': ['around', 'pear-shaped', 'rung', 'rotund', 'cycle']}

In [28]:
sentence = sub_words(text, syn_dict, cls_explainer.predicted_class_name, original_attribution_score)
sentence

Unable to change label.


'Canada six freehanded banks are brace for a further apear-shaped of perky profits when they begin reporting thirdquarter earn tuesday rig the stage for a third consecutive record year analysts said'

In [38]:
sent_exp = cls_explainer(sentence)
cls_explainer.visualize(true_class=true_auth)

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
6.0,LABEL_6 (0.80),LABEL_6,0.94,[CLS] canada six bo ##ast ##ful banks are po ##ise for a further aa ##tta ##ck of drinks out of golf of float ##y profits when they begin reporting third ##qua ##rter realise tuesday dress the stage for a third consecutive record year analysts said [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
6.0,LABEL_6 (0.80),LABEL_6,0.94,[CLS] canada six bo ##ast ##ful banks are po ##ise for a further aa ##tta ##ck of drinks out of golf of float ##y profits when they begin reporting third ##qua ##rter realise tuesday dress the stage for a third consecutive record year analysts said [SEP]
,,,,


# Adversarial Generator 

In [39]:
#loop through multiple samples
results = pd.DataFrame(columns=['Original Label', 'Original Attribution Score', 'Original Text', 'New Label', 'New Attribution Score', 'New text', 'Misclassified'])

def generate_adv(text):
    cls_explainer = SequenceClassificationExplainer(current_model, tokenizer)#, custom_labels=set(y_test))
    
    word_attributions = cls_explainer(text)
    del word_attributions[0]
    original_label = cls_explainer.predicted_class_name
    original_attribution_score = get_attribution_score(word_attributions)

    full_word_attributions = get_full_words(text, word_attributions)

    top_words = get_topk(text, full_word_attributions)
    
    # syn_dict = get_syn_dict_wn(top_words)
    syn_dict = get_syn_dict_kv(top_words)

    adv = sub_words(text, syn_dict, cls_explainer.predicted_class_name, original_attribution_score)
    adv_attributions = cls_explainer(adv)
    adv_label = cls_explainer.predicted_class_name
    adv_attrib_score = get_attribution_score(adv_attributions) 
    if original_label != adv_label:
        misclassified = True
    else:
        misclassified = False

    return {'Original Label': original_label, 'Original Attribution Score': original_attribution_score,
             'Original Text': text, 'New Label': adv_label, 'New Attribution Score': adv_attrib_score, 
             'New text': adv, 'Misclassified': misclassified}

In [40]:
for text in X_test:
    row = generate_adv(text)
    # print(row)
    results.loc[len(results)] = row
    if len(results) % 25 == 0:
        print(len(results))
    # results.append(row, ignore_index=True)
results


Unable to change label.
Unable to change label.
SUCCESS!
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
SUCCESS!
SUCCESS!
Unable to change label.
Unable to change label.
SUCCESS!
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
SUCCESS!
SUCCESS!
Unable to change label.
SUCCESS!
SUCCESS!
Unable to change label.
Unable to change label.
25
Unable to change label.
SUCCESS!
SUCCESS!
Unable to change label.
SUCCESS!
Unable to change label.
SUCCESS!
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
SUCCESS!
SUCCESS!
Unable to change label.
SUCCESS!
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
Unable to change label.
50
Unable to change la

Unnamed: 0,Original Label,Original Attribution Score,Original Text,New Label,New Attribution Score,New text,Misclassified
0,LABEL_4,5.50,china on tuesday rolled out the memory of long...,LABEL_4,5.43,porcelain on tuesday rolling out the memories-...,False
1,LABEL_1,1.49,cisco systems inc fell more than two points in...,LABEL_1,0.88,Leuciscuses systems ltd. falling more than two...,False
2,LABEL_3,1.11,caterpillar inc the worlds largest constructio...,LABEL_0,5.85,caterpillar-likes inc the worlds largest const...,True
3,LABEL_4,5.75,standard amp poors sampp on tuesday gave inves...,LABEL_4,5.73,standards amplifier poor- samplifierp on tuesd...,False
4,LABEL_1,1.36,sir ian prosser chairman of brewertoleisure gr...,LABEL_1,1.19,yesmisterree.s ean prosser vice-co-Chairman of...,False
...,...,...,...,...,...,...,...
847,LABEL_1,1.37,china showered the world trade organisation wt...,LABEL_4,5.37,china showered the world--and trade organisati...,True
848,LABEL_0,5.39,western oil firms frustrated by moscows footdr...,LABEL_3,0.86,western oil companies frustrated by moscows dr...,True
849,LABEL_4,5.82,hong kongs leaderinwaiting tung cheehwa on fri...,LABEL_1,-0.04,hong kongs leaderinwaiting chong cheehwa on fr...,True
850,LABEL_5,4.84,britains express newspapers seeking to halt a ...,LABEL_5,4.80,englands re-expressing newspaper seek to stops...,False


In [42]:
adv = results[results['Misclassified'] == False]
len(adv)

567

In [41]:
results.to_csv(r'distilbert_finetuned_c50_w2v\c50_w2v_adversarialGenerations_kv.csv')

### Similarity 

#### Cosine Similarity 

 The similarity scores will range from -1 to 1, where -1 indicates completely dissimilar text and 1 indicates identical text.

In [113]:
from sklearn.metrics.pairwise import cosine_similarity
from transformers import DistilBertModel, DistilBertTokenizer

sim_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
sim_model = DistilBertModel.from_pretrained('distilbert-base-uncased')

def encode_text(text):
    tokens = sim_tokenizer.encode(text, add_special_tokens=True)
    input_ids = torch.tensor(tokens).unsqueeze(0)
    with torch.no_grad():
        outputs = sim_model(input_ids)
        embeddings = outputs[0][:, 0, :].numpy()
    return embeddings

df = pd.read_csv(r'distilbert_finetuned_c50_w2v\c50_w2v_adversarialGenerations_kv.csv')
embeddings1 = df['Original Text'].apply(lambda x: encode_text(x)).to_numpy()
embeddings2 = df['New text'].apply(lambda x: encode_text(x)).to_numpy()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [116]:
similarity_scores = []

for i in range(len(df)):
    similarity_scores.append(cosine_similarity(embeddings1[i].reshape(1, -1), embeddings2[i].reshape(1, -1))[0][0])
df['similarity'] = similarity_scores

In [117]:
df.to_csv(r'distilbert_finetuned_c50_w2v\c50_w2v_adversarialGenerations_kv.csv')
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Original Label,Original Attribution Score,Original Text,New Label,New Attribution Score,New text,Misclassified,bleu_score,similarity
0,0,0,LABEL_4,5.50,china on tuesday rolled out the memory of long...,LABEL_4,5.43,porcelain on tuesday rolling out the memories-...,False,0.18,0.95
1,1,1,LABEL_1,1.49,cisco systems inc fell more than two points in...,LABEL_1,0.88,Leuciscuses systems ltd. falling more than two...,False,0.31,0.96
2,2,2,LABEL_3,1.11,caterpillar inc the worlds largest constructio...,LABEL_0,5.85,caterpillar-likes inc the worlds largest const...,True,0.86,0.99
3,3,3,LABEL_4,5.75,standard amp poors sampp on tuesday gave inves...,LABEL_4,5.73,standards amplifier poor- samplifierp on tuesd...,False,0.16,0.96
4,4,4,LABEL_1,1.36,sir ian prosser chairman of brewertoleisure gr...,LABEL_1,1.19,yesmisterree.s ean prosser vice-co-Chairman of...,False,0.37,0.98
...,...,...,...,...,...,...,...,...,...,...,...
847,847,847,LABEL_1,1.37,china showered the world trade organisation wt...,LABEL_4,5.37,china showered the world--and trade organisati...,True,0.91,0.99
848,848,848,LABEL_0,5.39,western oil firms frustrated by moscows footdr...,LABEL_3,0.86,western oil companies frustrated by moscows dr...,True,0.22,0.96
849,849,849,LABEL_4,5.82,hong kongs leaderinwaiting tung cheehwa on fri...,LABEL_1,-0.04,hong kongs leaderinwaiting chong cheehwa on fr...,True,0.68,0.99
850,850,850,LABEL_5,4.84,britains express newspapers seeking to halt a ...,LABEL_5,4.80,englands re-expressing newspaper seek to stops...,False,0.23,0.93


#### BLEU Score

as a general rule of thumb, higher BLEU scores are typically better, with scores above 0.4 or 0.5 often considered to be strong.

In [112]:
from nltk.translate.bleu_score import sentence_bleu
df = pd.read_csv(r'distilbert_finetuned_c50\c50_adversarialGenerations_wn.csv')

# define a function to calculate the BLEU score
def calculate_bleu_score(reference, candidate):
    return sentence_bleu([reference], candidate)

# apply the function to each row of the DataFrame and store the result in a new column
df['bleu_score'] = df.apply(lambda row: calculate_bleu_score(row['Original Text'].split(), row['New text'].split()), axis=1)

df.to_csv(r'distilbert_finetuned_c50\c50_adversarialGenerations_wn.csv')
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Original Label,Original Attribution Score,Original Text,New Label,New Attribution Score,New text,Misclassified,similarity,bleu_score
0,0,0,LABEL_17,2.51,royal bank of scotland which reports full year...,LABEL_8,2.14,majestic cant building of scotland which repor...,True,0.98,0.92
1,1,1,LABEL_28,1.06,russia is quietly importing some western grain...,LABEL_28,0.95,russia is quietly import some western caryopsi...,False,0.97,0.39
2,2,2,LABEL_4,2.13,an official inquiry into australias financial ...,LABEL_4,1.00,an prescribed interrogation into Australia fis...,False,0.89,0.12
3,3,3,LABEL_26,1.36,toronto stocks ended softer on monday after th...,LABEL_6,1.24,toronto neckcloth ended softer on monday after...,True,0.98,0.90
4,4,4,LABEL_6,1.43,canadas six biggest banks are poised for a fur...,LABEL_6,0.95,Canada six freehanded banks are brace for a fu...,False,0.98,0.51
...,...,...,...,...,...,...,...,...,...,...,...
705,705,705,LABEL_39,3.69,conrail inc and csx corp so far have convinced...,LABEL_39,2.55,conrail Iraqi National Congress and csx corpor...,False,0.96,0.00
706,706,706,LABEL_12,2.08,statistics canada has admitted to making a sig...,LABEL_6,2.15,statistics canada has intromit to earn a signi...,True,0.95,0.25
707,707,707,LABEL_44,2.56,scottish amicable on monday confirmed that thr...,LABEL_44,1.87,scottish amicable on monday sustain that three...,False,0.91,0.25
708,708,708,LABEL_5,2.59,mercury finance co said wednesday it found pro...,LABEL_31,0.83,Hg finance atomic number 27balt said wednesday...,True,0.97,0.57


## Run Clustered Adv On Author Model

In [132]:
test_df = pd.read_csv(r'distilbert_finetuned_c50_tfidf\finetuned_distilbert_c50_tfidf_pt25_testClassifications.csv')
a =test_df['Text'][0]
full_data

'boeing co is expected to use the biennial farnborough air show in england next week to formally launch the longawaited stretch version of its 747 jumbo jet industry analysts say'

In [133]:
full_data

Unnamed: 0,Author,Text
0,candidate00001,the internet may be overflowing with new techn...
1,candidate00001,the us postal service announced wednesday a pl...
2,candidate00001,elementary school students with access to the ...
3,candidate00001,an influential internet organisation has backe...
4,candidate00001,an influential internet organisation has backe...
...,...,...
2495,candidate00018,britains big banks look set to raise profits b...
2496,candidate00047,after two years of hype and euphoria about the...
2497,candidate00002,czech annual average consumer inflation eased ...
2498,candidate00037,kellogg co whose profits for 1996 are under pr...


In [136]:
list_of_authors = []
for index, row in test_df.iterrows():
    if row['Text'] in set(full_data['Text']):
        author = full_data.loc[full_data['Text'] == row['Text'], 'Author'].values[0]
        list_of_authors.append(author)
list_of_authors

['candidate00032',
 'candidate00022',
 'candidate00010',
 'candidate00003',
 'candidate00008',
 'candidate00038',
 'candidate00049',
 'candidate00037',
 'candidate00036',
 'candidate00011',
 'candidate00002',
 'candidate00047',
 'candidate00004',
 'candidate00030',
 'candidate00013',
 'candidate00019',
 'candidate00004',
 'candidate00007',
 'candidate00044',
 'candidate00035',
 'candidate00023',
 'candidate00045',
 'candidate00006',
 'candidate00014',
 'candidate00041',
 'candidate00022',
 'candidate00041',
 'candidate00041',
 'candidate00049',
 'candidate00038',
 'candidate00008',
 'candidate00001',
 'candidate00041',
 'candidate00048',
 'candidate00001',
 'candidate00012',
 'candidate00031',
 'candidate00042',
 'candidate00047',
 'candidate00046',
 'candidate00001',
 'candidate00036',
 'candidate00022',
 'candidate00011',
 'candidate00019',
 'candidate00004',
 'candidate00022',
 'candidate00030',
 'candidate00015',
 'candidate00035',
 'candidate00004',
 'candidate00002',
 'candidate0

# Get Top K Words

In [13]:
def get_top_k_words(text, w2v_model, clf_model, k):
    # Tokenize the text
    tokens = word_tokenize(text.lower())
    # Remove stopwords and words not in the Word2Vec model
    tokens = [token for token in tokens if token not in stopwords.words('english') and token in w2v_model.key_to_index]
    # Get the Word2Vec embeddings for the tokens
    X = w2v_model[tokens]
    # Get the feature importances for the tokens using the classifier model
    feature_importances = clf_model.coef_.mean(axis=0)
    # Sort the tokens by their feature importances
    sorted_words = sorted(zip(tokens, feature_importances), key=lambda x: x[1], reverse=True)
    # Return the top k words and their feature importance values
    return sorted_words[:k]

In [18]:
X = data['Text']
y = data['Author']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state=42)

# Load the pre-trained Word2Vec model
print('w2v...')
sentences = X_train.apply(get_sentence_embedding).tolist()
# sentences = X_train.apply(lambda x: x.split()).tolist()
# w2v_model = Word2Vec(sentences, vector_size=200, window=5, min_count=1, workers=4)


w2v_model = model#Word2Vec.load('wiki-news-300d-1M-subword.vec\wiki-news-300d-1M-subword.vec',)

clf = LogisticRegression(random_state=42)
clf.fit(sentences, y_train)
clf_model = clf#pickle.load(open('classifier_model.pkl', 'rb'))

test_sentences = X_test.apply(get_sentence_embedding).tolist()
test =test_sentences[0].reshape(1,-1)
yprob = clf_model.predict_proba(test)
# accuracy_score(y_test, y_pred)
ypred = clf_model.predict(test)



w2v...


In [None]:
X_test =X_test.reset_index(drop=True)
y_test =y_test.reset_index(drop=True)
y_test

In [None]:
# Get the top 10 words and their feature importance values for a text sample
print("top k words")
text = X_test.iloc[22]#"This is a new sample text"
label = y_test.iloc[22]
top_k_words = get_top_k_words(text, w2v_model , clf_model, 55)

# Print the top k words and their feature importance values
for word, importance in top_k_words:
    print(f"{word}: {importance}")
label, text

# Get Top Synonyms (of top k words)

In [28]:
from nltk.corpus import wordnet as wn

def find_top_n_synonyms(top_k_words, w2v_model, n):
    # Get the indices of the top k words in the text
    top_k_indices = [w2v_model.key_to_index[word] for word, _ in top_k_words]
    
    # Find the top n synonyms for each of the top k words
    top_n_synonyms = {}
    for word_index in top_k_indices:
        word = w2v_model.index_to_key[word_index]
        synonyms = []
        for synset in wn.synsets(word):
            for lemma in synset.lemmas():
                syn_word = lemma.name().replace('_', ' ').lower()
                if syn_word != word and syn_word in w2v_model:
                    similarity = w2v_model.similarity(word, syn_word)
                    synonyms.append((syn_word, similarity))
        synonyms = sorted(synonyms, key=lambda x: x[1], reverse=True)[:n]
        if synonyms:
            top_n_synonyms[word] = synonyms
    
    return top_n_synonyms



# Note that this function uses the NLTK library's WordNet module to find synonyms and their semantic similarity scores.
# The function takes in a parameter n to control the number of synonyms to find for each word. The output of the function is a
# dictionary that maps each of the top k words to a list of its top n synonyms, along with their semantic similarity scores.
#In this version of the function, the inner loop only adds lemmas to the synonyms list if they are not equal to the original word.

In [None]:
syn = find_top_n_synonyms(top_k_words, w2v_model, 25)
syn

# Substitution

In [36]:
# import necessary libraries and functions
from typing import List, Dict, Set
import numpy as np

# import necessary libraries and functions
from typing import List, Dict, Set
import numpy as np

def substitute_words(text_sample: str, topk: List[str], topsyn: Dict[str, Set[str]], classifier_model, w2v_model) -> str:
    # Convert the text sample to word embeddings
    embeddings = embedding_function(text_sample, w2v_model)
    
    # Predict the probability estimates of each class with the classifier model
    original_proba = classifier_model.predict_proba(embeddings.reshape(1,-1))
    
    # Get the index of the class with the highest probability
    original_label = np.argmax(original_proba)
    
    drops = {}
    for word in topk:
        if word not in w2v_model:
            continue
        else:
            print("HERE")
            for syn in topsyn[word[0]]:
                # Replace the word with its synonym
                new_text_sample = text_sample.replace(word, syn)
                
                # Convert the new text sample to word embeddings
                new_embeddings = []
                for word in new_text_sample.split():
                    if word in w2v_model:
                        new_embeddings.append(w2v_model[word])
                    else:
                        new_embeddings.append(w2v_model[w2v_model.index2word.index(word)])
                new_embeddings = np.array(new_embeddings)            
                # Predict the probability estimates of each class with the classifier model
                new_proba = classifier_model.predict_proba(new_embeddings)
                
                # Calculate the probability difference between the new label and the original label
                prob_diff = original_proba[0, original_label] - new_proba[0, original_label]
                print(prob_diff)
                # If the probability difference is negative, add the word to the "drops" dictionary
                if prob_diff < 0:
                    drops[word] = prob_diff
                
        # If there are words in the "drops" dictionary, choose the one that caused the biggest probability decrease and substitute it into the text sample
        if drops:
            max_drop_word = max(drops, key=drops.get)
            text_sample = text_sample.replace(max_drop_word, list(topsyn[max_drop_word])[0])
            drops = {}  # Reset the "drops" dictionary for the next word in topk
            
    # Convert the edited text sample to word embeddings
    edited_embeddings = embedding_function(text_sample, w2v_model)
    
    # Predict the probability estimates of each class for the edited text with the classifier model
    edited_proba = classifier_model.predict_proba(edited_embeddings.reshape(1,-1))
    
    # Get the index of the class with the highest probability for the edited text
    edited_label = np.argmax(edited_proba)
    
    # Compare the edited label to the original label
    if edited_label == original_label:
        return "Unable to misclassify"
    else:
        return f"New label: {edited_label}, New text: {text_sample}"


In [37]:
text = X_test.iloc[0]
# print(text)
substitute_words(text, top_k_words, syn, clf_model, w2v_model)

'Unable to misclassify'