Hugging Face

In [None]:
# All imports 

from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
import transformers
from huggingface_hub import login,notebook_login
from transformers import DataCollatorForTokenClassification
from transformers import AutoTokenizer
import evaluate
from datasets import Dataset
import numpy as np
import os
from transformers import pipeline
from sklearn.metrics import confusion_matrix
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns



In [None]:
# Log into hugging face
notebook_login()

In [None]:
# All pip installed packages


#%pip install ipywidgets
"""
%pip install accelerate -U
%pip install transformers
%pip install datasets
%pip install seqeval
#pip install transformers datasets evaluate seqeval
"""

In [None]:
label_list = ['O', 'B-LOC', 'I-LOC', 'B-PER', 'B-ORG', 'I-ORG', 'I-PER']


label2id = {
    'O': 0,
    'B-LOC': 1,
    'I-LOC': 2,
    'B-PER': 3,
    'B-ORG': 4,
    'I-ORG': 5,
    'I-PER': 6
}

id2label = {
    0: 'O',
    1: 'B-LOC',
    2: 'I-LOC',
    3: 'B-PER',
    4: 'B-ORG',
    5: 'I-ORG',
    6: 'I-PER'
    }

In [None]:
#label_to_index['O']

## 1. Proccessing the  EWT data

In [None]:
# Open the data and create dictionaries for each sentence 

def parse_iob2_content(content):
    data = []
    sentence_id = None
    sen_id = 0
    for line in content.split('\n'):
        line = line.strip()
        if line.startswith('# sent_id ='):
            temp_dic = {}
            #sentence_id = line.split('=')[1].strip()
            sentence_id = sen_id
            sen_id+= 1
            temp_dic['id'] = sentence_id
            temp_dic['tokens'] = []
            temp_dic['ner_tags'] = []
        elif line and not line.startswith('#'):
            parts = line.split('\t')
            temp_dic['tokens'].append(parts[1])
            temp_dic['ner_tags'].append(label2id[parts[2]])
        elif not line:
            data.append(temp_dic)
    return data

# Parse training data
with open('en_ewt-ud-train.iob2', 'r', encoding='utf-8') as file:
    content_train = file.read()



# Parse development data
with open('en_ewt-ud-dev.iob2', 'r', encoding='utf-8') as file:
    content_dev = file.read()

# Parse test data
#with open('en_ewt-ud-test-masked.iob2', 'r', encoding='utf-8') as file:
 #   content_test = file.read()

train_data = parse_iob2_content(content_train)
dev_data = parse_iob2_content(content_dev)
#test_data = parse_iob2_content(content_test)

train_data

In [None]:
# Get the number of sentances 
def count_unique_ids(data):
    unique_ids = set()
    for item in data:
        unique_ids.add(item['id'])
    return len(unique_ids)

numb_sentances = count_unique_ids(dev_data)
numb_sentances 

In [None]:
# Stuff from the hugging face


tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-cased")
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
seqeval = evaluate.load("seqeval")
os.environ["WANDB_DISABLED"] = "true"

In [None]:
#example = train_data[0]

def tokenize_and_align_labels(example):
    tokenized_inputs = tokenizer(example["tokens"], truncation=True, is_split_into_words=True)
    #print(tokenized_inputs)
    #print(example['ner_tags'])
    #labels_ = []
    for i, label in enumerate(example[f"ner_tags"]):
        #print(i, label)
        word_ids = tokenized_inputs.word_ids()  # Map tokens to their respective word.
        #print(word_ids)
        #print('Hi')
        #print(word_ids, "Hi")
        #print(label)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
                #print(label_ids)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                #print(label[word_idx])
                #print(label)
                #print(example[f"ner_tags"][word_idx])
                label_ids.append(example["ner_tags"][word_idx])
                #label_ids.append(label[word_idx])
                #label_ids.append(label)
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        #labels_.append(label_ids)
        #print(labels_)

    tokenized_inputs["labels"] = label_ids #labels_
    #print('Hi')
    return tokenized_inputs

In [None]:
# Tokenize both datsets EWT (dev and train)
tokeized_train = []
for i in train_data:
    t = tokenize_and_align_labels(i)
    tokeized_train.append(t)

#print(tokeized_train)
#tokenized_inputs


tokeized_dev = []
for i in dev_data:
    t = tokenize_and_align_labels(i)
    tokeized_dev.append(t)

In [None]:
#example_ids =

#labels = [label_list[i] for i in example[f"ner_tags"]]


def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

## 2. Process TweeBank data 

In [None]:
def process_bio_file(file_path):
    label2id = {
        'O': 0,
        'B-LOC': 1,
        'I-LOC': 2,
        'B-PER': 3,
        'B-ORG': 4,
        'I-ORG': 5,
        'I-PER': 6
    }

    compare_data = []

    with open(file_path, 'r', encoding='utf-8') as file:
        sentences = []
        current_sentence = {'id': 0, 'tokens': [], 'ner_tags': []}
        sentence_id = 0  # Initialize sentence ID

        for line in file:
            # Skip lines starting with '-DOCSTART-' or empty lines
            if line.startswith("-DOCSTART-") or line.strip() == "":
                # If the current sentence is not empty, append it to the list of sentences
                if current_sentence['tokens']:
                    sentences.append(current_sentence)
                    current_sentence = {'id': sentence_id, 'tokens': [], 'ner_tags': []}  # Reset current sentence
                    sentence_id += 1  # Increment sentence ID
                continue
            
            # Split the line into word and label
            word, _, _, label = line.strip().split('\t')
            
            # Append word to the current sentence tokens
            current_sentence['tokens'].append(word)
            # Map label to its corresponding numerical value using label2id dictionary
            current_sentence['ner_tags'].append(label2id.get(label, 0))  # Use 0 as default if label is not found

        # Append the last sentence to the list of sentences if it's not empty
        if current_sentence['tokens']:
            sentences.append(current_sentence)

        compare_data.extend(sentences)
    
    return compare_data

# Call the function with the file path
compare_data_1 = process_bio_file('train.bio')

# Print the processed data
print(compare_data_1)

In [None]:
def process_bio_file(file_path):
    label2id = {
        'O': 0,
        'B-LOC': 1,
        'I-LOC': 2,
        'B-PER': 3,
        'B-ORG': 4,
        'I-ORG': 5,
        'I-PER': 6
    }

    compare_data = []

    with open(file_path, 'r', encoding='utf-8') as file:
        sentences = []
        current_sentence = {'id': 0, 'tokens': [], 'ner_tags': []}
        sentence_id = 0  # Initialize sentence ID

        for line in file:
            # Skip lines starting with '-DOCSTART-' or empty lines
            if line.startswith("-DOCSTART-") or line.strip() == "":
                # If the current sentence is not empty, append it to the list of sentences
                if current_sentence['tokens']:
                    sentences.append(current_sentence)
                    current_sentence = {'id': sentence_id, 'tokens': [], 'ner_tags': []}  # Reset current sentence
                    sentence_id += 1  # Increment sentence ID
                continue
            
            # Split the line into word and label
            word, _, _, label = line.strip().split('\t')
            
            # Append word to the current sentence tokens
            current_sentence['tokens'].append(word)
            # Map label to its corresponding numerical value using label2id dictionary
            current_sentence['ner_tags'].append(label2id.get(label, 0))  # Use 0 as default if label is not found

        # Append the last sentence to the list of sentences if it's not empty
        if current_sentence['tokens']:
            sentences.append(current_sentence)

        compare_data.extend(sentences)
    
    return compare_data

# Call the function with the file path
compare_data_1 = process_bio_file('train.bio')

# Print the processed data
print(compare_data_1)


In [None]:

# Tokenize the comparison data
tokeized_comparedata_1 = []
for i in compare_data_1 :
    t = tokenize_and_align_labels(i)
    tokeized_comparedata_1.append(t)

print(tokeized_comparedata_1[:10])
#tokenized_inputs


## 3. Process the crossNER dataset

In [None]:
label2id_crossNER = {
    'O': 0,
    'B-location': 1,
    'I-location': 2,
    'B-person': 3,
    'B-organisation': 4,
    'I-organisation': 5,
    'I-person': 6,
    'B-misc': 0,
    'I-misc': 0
}

In [None]:


def parse_iob2_content_crossNER(content):
    data = []
    sen_id = 0
    temp_dic = {}
    temp_dic['id'] = sen_id
    temp_dic['tokens'] = []
    temp_dic['ner_tags'] = []
    prev_line = 'start' #added
    for line in content.split('\n'):
        
        line = line.strip()
        if line == '' and prev_line != '': #if line is empty and the previous line wasn't empty as well
            
            data.append(temp_dic)
            temp_dic = {}
            sen_id+= 1
            temp_dic['id'] = sen_id
            temp_dic['tokens'] = []
            temp_dic['ner_tags'] = []
        elif line:
            parts = line.split('\t')
            temp_dic['tokens'].append(parts[0])
            temp_dic['ner_tags'].append(label2id_crossNER[parts[1]])
        prev_line = line #added
    return data

In [None]:
# Parse training data
with open('crossNER_train.txt', 'r', encoding='utf-8') as file:
    content_crossNER_train = file.read()

In [None]:
crossNER_train_data = parse_iob2_content_crossNER(content_crossNER_train)
crossNER_train_data

tokeized_train_crossNER = []
for i in crossNER_train_data:
    #print(i)
    t = tokenize_and_align_labels(i)
    tokeized_train_crossNER.append(t)

#tokeized_train_crossNER

## 4. Count the number of tokens,sentances and lables 

In [None]:
def count_tokens(data):
    total_tokens = 0
    for item in data:
        total_tokens += len(item['input_ids'])
    return total_tokens
# count the number of tokens per each dataset
nmb_tokens_crossNER = count_tokens (tokeized_train_crossNER)
nmb_tokens_tweet = count_tokens(tokeized_comparedata_1)
nmb_tokens_ewt = count_tokens(tokeized_dev)


In [None]:
def count_items(dataset):
    return len(dataset)

# Get the number of sentances for all the datasets
numb_sentances_crossNER = count_items(tokeized_train_crossNER)
numb_sentances_tweet = count_items(tokeized_comparedata_1)
numb_sentances_ewt = count_items(tokeized_dev)
print(numb_sentances_crossNER,numb_sentances_tweet,numb_sentances_ewt)

In [None]:
def count_labels(data):
    label_counts = {
        'O': 0,
        'B-LOC': 0,
        'I-LOC': 0,
        'B-PER': 0,
        'B-ORG': 0,
        'I-ORG': 0,
        'I-PER': 0
    }

    for item in data:
        ner_tags = item['ner_tags']
        for tag in ner_tags:
            for label, index in label2id.items():
                if index == tag:
                    label_counts[label] += 1

    return label_counts

# Call the function with the processed data
label_counts = count_labels(compare_data_1)
label_counts_dev = count_labels(dev_data)
label_counts_crossNER = count_labels(crossNER_train_data)

# Print the counts
print("Label Counts:")
for label, count in label_counts.items():
    print(label, ":", count)



# Print the counts
print("Label Counts:")
for label, count in label_counts_dev.items():
    print(label, ":", count)


# Print the counts
print("Label Counts:")
for label, count in label_counts_crossNER.items():
    print(label, ":", count)



In [None]:
#tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-cased")

In [None]:
#example = train_data[0]
"""
tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
tokens
"""

In [None]:
'''def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        #print('Hi')
        #print(word_ids, "Hi")
        #print(label)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                #print(label[word_idx])
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs'''

In [None]:
#train_data[:2]

In [None]:
#from datasets import load_dataset

## The Model 

In [None]:


model = AutoModelForTokenClassification.from_pretrained(
    "distilbert/distilbert-base-cased", num_labels=7, id2label=id2label, label2id=label2id
)

In [None]:
# Train the model

"""
training_args = TrainingArguments(
    output_dir="our_awesome_BERT_model",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    #push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokeized_train,
    eval_dataset=tokeized_dev,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

model_output = trainer.train()
"""

#trainer.predict()

In [None]:
#trainer

In [None]:
# push the model to the hugging face website
#trainer.push_to_hub("model_name1")

In [None]:
# Save the mdoel locally
#model_output
#trainer.save_model("model1")

In [None]:
#text = "The Golden State Warriors are an American professional basketball team based in San Francisco."

In [None]:



classifier = pipeline("ner", model="our_awesome_BERT_model/checkpoint-784/")
classifier(text)

In [None]:
# read the txt file
#f = open("train.txt", "r")
#print(f.read())

## Test the model on 3 datasets

In [None]:
# Load the model
loaded_model = AutoModelForTokenClassification.from_pretrained("model1")
trainer = Trainer(model = loaded_model)


In [None]:
collated_compare_data=data_collator(tokeized_comparedata_1)

In [None]:
test_dataset= Dataset.from_dict({
    "id":range(len(collated_compare_data["input_ids"])),
    "input_ids":collated_compare_data["input_ids"],
    "attention_mask":collated_compare_data["attention_mask"],
    "labels":collated_compare_data["labels"],
})

In [None]:
ps = trainer.predict(test_dataset)
results = compute_metrics(ps[:2])

In [None]:
results


predictions, labels, _ = trainer.predict(test_dataset)
predictions = np.argmax(predictions, axis=2)

true_predictions_twee = [
    [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]
true_labels_twee = [
    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]


In [None]:
seqeval.compute(predictions=true_predictions_twee, references=true_labels_twee)


In [None]:
true_predictions_twee
true_labels_twee

In [None]:
# File path to save predictions and true labels
file_path = "predictions_and_labels_tweeBank.txt"

# Initialize sentence ID counter
sentence_id = 1

# Open the file in write mode
with open(file_path, "w") as file:
    # Iterate over each prediction and true label
    for pred, true in zip(true_predictions_twee, true_labels_twee):
        # Count occurrences of each label in predictions and true labels
        pred_counts = {label: pred.count(label) for label in set(pred)}
        true_counts = {label: true.count(label) for label in set(true)}
        
        # Write the sentence ID
        file.write(f"Sentence ID: {sentence_id}\n")
        
        # Write the prediction counts
        file.write("Predictions:\n")
        for label, count in pred_counts.items():
            file.write(f"{label}: {count}\n")
        
        # Write the true label counts
        file.write("\nTrue Labels:\n")
        for label, count in true_counts.items():
            file.write(f"{label}: {count}\n")
        
        # Write the prediction and true label pairs
        file.write("\nPredictions vs True Labels:\n")
        for pred_label, true_label in zip(pred, true):
            file.write(f"{pred_label}\t{true_label}\n")  # Separate by tab and add a newline at the end
        
        # Add an empty line to separate different instances
        file.write("\n")
        
        # Increment the sentence ID
        sentence_id += 1



#### Test on CrossNER

In [None]:
collated_compare_data_crossNER =data_collator(tokeized_train_crossNER)

In [None]:
test_dataset_crossNER= Dataset.from_dict({
    "id":range(len(collated_compare_data_crossNER["input_ids"])),
    "input_ids":collated_compare_data_crossNER["input_ids"],
    "attention_mask":collated_compare_data_crossNER["attention_mask"],
    "labels":collated_compare_data_crossNER["labels"],
})

In [None]:
ps_NER = trainer.predict(test_dataset_crossNER)
results_NER = compute_metrics(ps_NER[:2])


In [None]:
predictions, labels, _ = trainer.predict(test_dataset_crossNER)
predictions = np.argmax(predictions, axis=2)

true_predictions_crossNER = [
    [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]
true_labels_crossNER = [
    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]


In [None]:
# File path to save predictions and true labels
file_path = "predictions_and_labels_crossNER.txt"

# Initialize sentence ID counter
sentence_id = 1

# Open the file in write mode
with open(file_path, "w") as file:
    # Iterate over each prediction and true label
    for pred, true in zip(true_predictions_crossNER, true_labels_crossNER):
        # Count occurrences of each label in predictions and true labels
        pred_counts = {label: pred.count(label) for label in set(pred)}
        true_counts = {label: true.count(label) for label in set(true)}
        
        # Write the sentence ID
        file.write(f"Sentence ID: {sentence_id}\n")
        
        # Write the prediction counts
        file.write("Predictions:\n")
        for label, count in pred_counts.items():
            file.write(f"{label}: {count}\n")
        
        # Write the true label counts
        file.write("\nTrue Labels:\n")
        for label, count in true_counts.items():
            file.write(f"{label}: {count}\n")
        
        # Write the prediction and true label pairs
        file.write("\nPredictions vs True Labels:\n")
        for pred_label, true_label in zip(pred, true):
            file.write(f"{pred_label}\t{true_label}\n")  # Separate by tab and add a newline at the end
        
        # Add an empty line to separate different instances
        file.write("\n")
        
        # Increment the sentence ID
        sentence_id += 1


In [None]:
seqeval.compute(predictions=true_predictions_crossNER, references=true_labels_crossNER)

#### Test on DEV dataset

In [None]:
collated_compare_data_dev =data_collator(tokeized_dev)

In [None]:
test_dataset_dev= Dataset.from_dict({
    "id":range(len(collated_compare_data_dev["input_ids"])),
    "input_ids":collated_compare_data_dev["input_ids"],
    "attention_mask":collated_compare_data_dev["attention_mask"],
    "labels":collated_compare_data_dev["labels"],
})

In [None]:
predictions, labels, _ = trainer.predict(test_dataset_dev)
predictions = np.argmax(predictions, axis=2)

true_predictions_dev = [
    [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]
true_labels_dev = [
    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]


In [None]:
# 
# File path to save predictions and true labels
file_path = "predictions_and_labels_EWT_dev.txt"

# Initialize sentence ID counter
sentence_id = 1

# Open the file in write mode
with open(file_path, "w") as file:
    # Iterate over each prediction and true label
    for pred, true in zip(true_predictions_dev, true_labels_dev):
        # Count occurrences of each label in predictions and true labels
        pred_counts = {label: pred.count(label) for label in set(pred)}
        true_counts = {label: true.count(label) for label in set(true)}
        
        # Write the sentence ID
        file.write(f"Sentence ID: {sentence_id}\n")
        
        # Write the prediction counts
        file.write("Predictions:\n")
        for label, count in pred_counts.items():
            file.write(f"{label}: {count}\n")
        
        # Write the true label counts
        file.write("\nTrue Labels:\n")
        for label, count in true_counts.items():
            file.write(f"{label}: {count}\n")
        
        # Write the prediction and true label pairs
        file.write("\nPredictions vs True Labels:\n")
        for pred_label, true_label in zip(pred, true):
            file.write(f"{pred_label}\t{true_label}\n")  # Separate by tab and add a newline at the end
        
        # Add an empty line to separate different instances
        file.write("\n")
        
        # Increment the sentence ID
        sentence_id += 1


In [None]:
#print(ps)

In [None]:
seqeval.compute(predictions=true_predictions_dev, references=true_labels_dev)

## Get the confusion matrices for each dataset that the model was tested on

In [None]:
# getting the percentages for Twee bank
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# For TweeBank

# Flatten the true_predictions and true_labels lists
true_predictions_flat = [label for sublist in true_predictions_twee for label in sublist]
true_labels_flat = [label for sublist in true_labels_twee for label in sublist]

# Calculate the confusion matrix
conf_matrix = confusion_matrix(true_labels_flat, true_predictions_flat)

# Calculate row sums
row_sums = conf_matrix.sum(axis=1)

# Convert counts to percentages
conf_matrix_percent = (conf_matrix / row_sums[:, np.newaxis]) * 100

# Get the unique labels
unique_labels = sorted(set(true_labels_flat))

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix_percent, annot=True, fmt=".2f", cmap="Blues", xticklabels=unique_labels, yticklabels=unique_labels,annot_kws={"fontsize":15})
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix for Tweebank with BERT')
plt.show()


In [None]:
# For TweeBank

# Flatten the true_predictions and true_labels lists
true_predictions_flat = [label for sublist in true_predictions_twee for label in sublist]
true_labels_flat = [label for sublist in true_labels_twee for label in sublist]
print(true_predictions_flat)
print(true_labels_flat)

# Calculate the confusion matrix
conf_matrix = confusion_matrix(true_labels_flat, true_predictions_flat)
# Get the unique labels
unique_labels = sorted(set(true_labels_flat))


# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=unique_labels, yticklabels=unique_labels,annot_kws={"fontsize":15})
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix for Tweebank with BERT')

In [None]:
# get the percentages for cross NER
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# For Cross NER

# Flatten the true_predictions and true_labels lists
true_predictions_flat_crossNER = [label for sublist in true_predictions_crossNER for label in sublist]
true_labels_flat_crossNER = [label for sublist in true_labels_crossNER for label in sublist]

# Calculate the confusion matrix
conf_matrix = confusion_matrix(true_labels_flat_crossNER, true_predictions_flat_crossNER)

# Calculate row sums
row_sums = conf_matrix.sum(axis=1)

# Convert counts to percentages
conf_matrix_percent = (conf_matrix / row_sums[:, np.newaxis]) * 100

# Get the unique labels
unique_labels = sorted(set(true_labels_flat_crossNER))

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix_percent, annot=True, fmt=".2f", cmap="Blues", xticklabels=unique_labels, yticklabels=unique_labels,annot_kws={"fontsize":15})
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix for CrossNER with BERT')
plt.show()


In [None]:
# For Cross NER


# Flatten the true_predictions and true_labels lists
true_predictions_flat_crossNER = [label for sublist in true_predictions_crossNER for label in sublist]
true_labels_flat_crossNER = [label for sublist in true_labels_crossNER for label in sublist]


# Calculate the confusion matrix
conf_matrix = confusion_matrix(true_labels_flat_crossNER, true_predictions_flat_crossNER)
# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix for CrossNER with BERT')

In [None]:
# get the percentage for DEV EWT
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# for EWT DEV

# Flatten the true_predictions and true_labels lists
true_predictions_flat_dev = [label for sublist in true_predictions_dev for label in sublist]
true_labels_flat_dev = [label for sublist in true_labels_dev for label in sublist]

# Calculate the confusion matrix
conf_matrix = confusion_matrix(true_labels_flat_dev, true_predictions_flat_dev)

# Calculate row sums
row_sums = conf_matrix.sum(axis=1)

# Convert counts to percentages
conf_matrix_percent = (conf_matrix / row_sums[:, np.newaxis]) * 100

# Get the unique labels
unique_labels = sorted(set(true_labels_flat_dev))

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix_percent, annot=True, fmt=".2f", cmap="Blues", xticklabels=unique_labels, yticklabels=unique_labels,annot_kws={"fontsize":15})
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix for EWT_dev with BERT')
plt.show()


In [None]:
# for EWT DEV

# Flatten the true_predictions and true_labels lists
true_predictions_flat_dev = [label for sublist in true_predictions_dev for label in sublist]
true_labels_flat_dev = [label for sublist in true_labels_dev for label in sublist]


# Calculate the confusion matrix
conf_matrix = confusion_matrix(true_labels_flat_dev, true_predictions_flat_dev)
# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=unique_labels, yticklabels=unique_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix for EWT_dev with BERT')

In [None]:
# plray aroind to find the right font size
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# for EWT DEV

# Flatten the true_predictions and true_labels lists
true_predictions_flat_dev = [label for sublist in true_predictions_dev for label in sublist]
true_labels_flat_dev = [label for sublist in true_labels_dev for label in sublist]

# Calculate the confusion matrix
conf_matrix = confusion_matrix(true_labels_flat_dev, true_predictions_flat_dev)

# Calculate row sums
row_sums = conf_matrix.sum(axis=1)

# Convert counts to percentages
conf_matrix_percent = (conf_matrix / row_sums[:, np.newaxis]) * 100

# Get the unique labels
unique_labels = sorted(set(true_labels_flat_dev))

# Plot the heatmap with customized font size
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix_percent, annot=True, fmt=".2f", cmap="Blues", xticklabels=unique_labels, yticklabels=unique_labels,
            annot_kws={"fontsize":15})  # Adjust the font size here
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix for EWT_dev with BERT')
plt.show()
