In [None]:
!pip install seqeval


Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m683.7 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=b0506142d1315328c5a327683aacb295d2ee87c35fc7e471a11b5db2b2c25f70
  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
from sklearn.model_selection import train_test_split

In [None]:
# Initialize variables
sentences = []
current_sentence = []
sent_id = 1

# Read the CONLL file
with open('p_saf_dataset.conll', 'r') as file:
    for line in file:
        line = line.strip()
        if line:  # If line is not empty
            word, tag = line.split()
            current_sentence.append((word, tag))
        else:  # If line is empty, this means end of a sentence
            sentences.append((sent_id, current_sentence))
            current_sentence = []
            sent_id += 1

# Handle the last sentence if there is no trailing empty line
if current_sentence:
    sentences.append((sent_id, current_sentence))

# Convert to DataFrame
data = []
for sent_id, sentence in sentences:
    for word, tag in sentence:
        data.append((word, tag, sent_id))

df = pd.DataFrame(data, columns=['Word', 'Tag', 'Sent_ID'])

df

Unnamed: 0,Word,Tag,Sent_ID
0,BİLATERAL,O,1
1,MAMOGRAFİ,O,1
2,İNCELEMESİ,O,1
3,:,ANAT,1
4,\nHer,ANAT,1
...,...,...,...
69763,.,O,880
69764,SONUÇ,O,880
69765,:,O,880
69766,BIRADS,O,880


In [None]:
df.Tag.value_counts()


Unnamed: 0_level_0,count
Tag,Unnamed: 1_level_1
O,34708
ANAT,15429
OBS-PRESENT,11440
OBS-ABSENT,6590
OBS-UNCERTAIN,1601


In [None]:
df.Tag.unique()


array(['O', 'ANAT', 'OBS-ABSENT', 'OBS-UNCERTAIN', 'OBS-PRESENT'],
      dtype=object)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming 'df' is your DataFrame

# Get unique sentence IDs
unique_sent_ids = df['Sent_ID'].unique()

# Split the sentence IDs into train and validation sets
train_ids, val_ids = train_test_split(unique_sent_ids, test_size=0.2, random_state=42)

# Create train and validation DataFrames by filtering based on Sent_ID
train_df = df[df['Sent_ID'].isin(train_ids)]
val_df = df[df['Sent_ID'].isin(val_ids)]

# Optionally, reset index
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

# Display the resulting DataFrames
print("Training Set:")
print(train_df)
print("\nValidation Set:")
print(val_df)


Training Set:
             Word   Tag  Sent_ID
0       BİLATERAL     O        1
1       MAMOGRAFİ     O        1
2      İNCELEMESİ     O        1
3               :  ANAT        1
4           \nHer  ANAT        1
...           ...   ...      ...
56204           .     O      880
56205       SONUÇ     O      880
56206           :     O      880
56207      BIRADS     O      880
56208           4     O      880

[56209 rows x 3 columns]

Validation Set:
                  Word   Tag  Sent_ID
0            BİLATERAL     O        6
1            MAMOGRAFİ     O        6
2      İNCELEMESİ\nHer  ANAT        6
3                  iki  ANAT        6
4               memede  ANAT        6
...                ...   ...      ...
13554                .     O      876
13555            SONUÇ     O      876
13556                :     O      876
13557           BIRADS     O      876
13558                5     O      876

[13559 rows x 3 columns]


In [None]:
train_df.to_csv("train_df.csv", index=False)
val_df.to_csv("val_df.csv", index=False)

In [None]:
class GetSentence(object):

    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False

        agg_func = lambda s: [(w, t) for w, t in zip(s["Word"].values.tolist(),
                                                           s["Tag"].values.tolist())]
        self.grouped = self.data.groupby("Sent_ID").apply(agg_func)
        self.sentences = [s for s in self.grouped]

    def get_next(self):
        try:
            s = self.grouped["{}".format(self.n_sent)]
            self.n_sent += 1
            return s
        except:
            return None

In [None]:
getter = GetSentence(train_df)
v_getter = GetSentence(val_df)

In [None]:
sentences = [[word[0] for word in sentence] for sentence in getter.sentences]
v_sentences = [[word[0] for word in sentence] for sentence in v_getter.sentences]


In [None]:
labels = [[s[1] for s in sentence] for sentence in getter.sentences]

v_labels = [[s[1] for s in sentence] for sentence in v_getter.sentences]
v_labels[0]

['O',
 'O',
 'ANAT',
 'ANAT',
 'ANAT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'ANAT',
 'O',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'O',
 'O',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'OBS-UNCERTAIN',
 'ANAT',
 'ANAT',
 'ANAT',
 'ANAT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'O',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'OBS-ABSENT',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O']

In [None]:
tag_values = list(set(train_df["Tag"].values))
tag_values.append("PAD")
tag2idx = {t: i for i, t in enumerate(tag_values)}

In [None]:
#Adding Padding at the end of each sentence
v_tag_values = list(set(val_df["Tag"].values))
v_tag_values.append("PAD")
v_tag2idx = {t: i for i, t in enumerate(tag_values)}

In [None]:
tag_values

['OBS-UNCERTAIN', 'ANAT', 'OBS-PRESENT', 'OBS-ABSENT', 'O', 'PAD']

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertConfig, AlbertTokenizer, AlbertConfig, RobertaConfig, RobertaTokenizer, AutoTokenizer

from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

torch.__version__

'2.4.0+cu121'

In [None]:
MAX_LEN = 320
bs = 16

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
n_gpu

1

In [None]:
torch.cuda.get_device_name(0)


'NVIDIA A100-SXM4-40GB'

In [None]:
model_name = "savasy/bert-base-turkish-ner-cased"


In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=False)




In [None]:
def tokenize_and_preserve_labels(sentence, text_labels):
    tokenized_sentence = []
    labels = []

    for word, label in zip(sentence, text_labels):

        # Tokenize et
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        # Tokenized kelime listesine ekle
        tokenized_sentence.extend(tokenized_word)

        # Etiketi listeye ekle
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

In [None]:
tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(sentences, labels)
]
v_tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(v_sentences, v_labels)
]

In [None]:
tokenized_texts = [token_label_pair[0] for token_label_pair in tokenized_texts_and_labels]
labels = [token_label_pair[1] for token_label_pair in tokenized_texts_and_labels]

v_tokenized_texts = [token_label_pair[0] for token_label_pair in v_tokenized_texts_and_labels]
v_labels = [token_label_pair[1] for token_label_pair in v_tokenized_texts_and_labels]

In [None]:
input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

v_input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in v_tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

In [None]:
len(labels)


704

In [None]:
tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")


v_tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in v_labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")

In [None]:
# Dikkat maskelerini ayarla
attention_masks = [[float(i != 0.0) for i in ii] for ii in input_ids]

v_attention_masks = [[float(i != 0.0) for i in ii] for ii in v_input_ids]

In [None]:
tr_inputs, val_inputs, tr_tags, val_tags = train_test_split(input_ids, tags,
                                                            random_state=2018, test_size=0.1)
tr_masks, val_masks, _, _ = train_test_split(attention_masks, input_ids,
                                             random_state=2018, test_size=0.1)

In [None]:
tr_inputs = input_ids
val_inputs = v_input_ids
tr_tags = tags
val_tags = v_tags
tr_masks = attention_masks
val_masks = v_attention_masks

In [None]:
tr_inputs = torch.tensor(tr_inputs)
val_inputs = torch.tensor(val_inputs)
tr_tags = torch.tensor(tr_tags)
val_tags = torch.tensor(val_tags)
tr_masks = torch.tensor(tr_masks)
val_masks = torch.tensor(val_masks)

In [None]:
# Verileri karıştır
train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=bs)

valid_data = TensorDataset(val_inputs, val_masks, val_tags)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=bs)

In [None]:
import transformers
from transformers import BertForTokenClassification, AdamW, AlbertForTokenClassification, RobertaForTokenClassification, AutoModelForTokenClassification, AutoConfig

transformers.__version__

'4.44.2'

In [None]:
config = AutoConfig.from_pretrained(model_name)
config.num_labels = len(tag2idx)
config.output_attentions = False
config.output_hidden_states = False
model = AutoModelForTokenClassification.from_pretrained(
    model_name,
    config=config,
    ignore_mismatched_sizes=True
)

Some weights of the model checkpoint at savasy/bert-base-turkish-ner-cased were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at savasy/bert-base-turkish-ner-cased and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([6]) in the model instantiated
- classifier.weight: fou

In [None]:
model.cuda();


In [None]:
FULL_FINETUNING = True
if FULL_FINETUNING:
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0}
    ]
else:
    param_optimizer = list(model.classifier.named_parameters())
    optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]

optimizer = AdamW(
    optimizer_grouped_parameters,
    lr=3e-5,
    eps=1e-8
)



In [None]:
# Parametreleri tanımla (epochs ve learning rate)
from transformers import get_linear_schedule_with_warmup

epochs = 35
max_grad_norm = 1.0

total_steps = len(train_dataloader) * epochs


scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)

In [None]:
from seqeval.metrics import f1_score

def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=2).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [None]:
loss_values, validation_loss_values = [], []

for _ in trange(epochs, desc="Epoch"):

    model.train()

    total_loss = 0

    for step, batch in enumerate(train_dataloader):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        model.zero_grad()
        outputs = model(b_input_ids, token_type_ids=None,
                        attention_mask=b_input_mask, labels=b_labels)
        loss = outputs[0]
        loss.backward()
        total_loss += loss.item()
        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
        optimizer.step()
        scheduler.step()


    avg_train_loss = total_loss / len(train_dataloader)
    print("Average train loss: {}".format(avg_train_loss))


    loss_values.append(avg_train_loss) # Plot için kayıp verilerini al


    model.eval() # Her bir eğitim adımından sonra değerlendirme yap
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions , true_labels = [], []
    for batch in valid_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None,
                            attention_mask=b_input_mask, labels=b_labels)

        logits = outputs[1].detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()


        eval_loss += outputs[0].mean().item()
        eval_accuracy += flat_accuracy(logits, label_ids)
        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
        true_labels.extend(label_ids)

        nb_eval_examples += b_input_ids.size(0)
        nb_eval_steps += 1

    eval_loss = eval_loss / nb_eval_steps
    validation_loss_values.append(eval_loss)
    print("Validation loss: {}".format(eval_loss))
    print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))
    pred_tags = [tag_values[p_i] for p, l in zip(predictions, true_labels)
                                 for p_i, l_i in zip(p, l) if tag_values[l_i] != "PAD"]
    valid_tags = [tag_values[l_i] for l in true_labels
                                  for l_i in l
                                    if tag_values[l_i] != "PAD"]
    print("Validation F1-Score: {}".format(f1_score([pred_tags], [valid_tags])))
    print()

Epoch:   0%|          | 0/35 [00:00<?, ?it/s]

Average train loss: 0.5964434736154296


Epoch:   3%|▎         | 1/35 [00:09<05:18,  9.37s/it]

Validation loss: 0.4554601392962716
Validation Accuracy: 0.811505681818182
Validation F1-Score: 0.00528169014084507

Average train loss: 0.44855695217847824


Epoch:   6%|▌         | 2/35 [00:18<05:07,  9.32s/it]

Validation loss: 0.40074028210206464
Validation Accuracy: 0.8286399147727274
Validation F1-Score: 0.008639308855291575

Average train loss: 0.4048233580860225


Epoch:   9%|▊         | 3/35 [00:27<04:58,  9.32s/it]

Validation loss: 0.364522784948349
Validation Accuracy: 0.8321555397727273
Validation F1-Score: 0.016704631738800303



Epoch:   9%|▊         | 3/35 [00:35<06:18, 11.84s/it]


KeyboardInterrupt: 

In [None]:
model.save_pretrained("ner_model11")
tokenizer.save_pretrained("ner_model11")

('ner_model11/tokenizer_config.json',
 'ner_model11/special_tokens_map.json',
 'ner_model11/vocab.txt',
 'ner_model11/added_tokens.json',
 'ner_model11/tokenizer.json')

In [None]:
import torch
from transformers import BertForTokenClassification
from sklearn.metrics import classification_report
import numpy as np

# Load the trained model
model = BertForTokenClassification.from_pretrained('ner_model11')
model.eval()

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Predict function
def predict(model, dataloader):
    model.eval()
    predictions, true_labels = [], []

    for batch in dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            outputs = model(b_input_ids, attention_mask=b_input_mask)
        logits = outputs.logits

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Store predictions and true labels
        predictions.append(np.argmax(logits, axis=2))
        true_labels.append(label_ids)

    return predictions, true_labels

# Get predictions and true labels
predictions, true_labels = predict(model, valid_dataloader)

# Flatten the predictions and labels for the classification report
pred_tags = [tag for pred_batch in predictions for pred in pred_batch for tag in pred]
true_tags = [tag for true_batch in true_labels for true in true_batch for tag in true]

# Remove the padding tokens (assumed to have a value of tag2idx['PAD'])
pred_tags = [tag for i, tag in enumerate(pred_tags) if true_tags[i] != tag2idx['PAD']]
true_tags = [tag for tag in true_tags if tag != tag2idx['PAD']]


target_names = [label for label in tag2idx.keys() if label != "PAD"]

report = classification_report(true_tags, pred_tags, target_names=target_names)
print(report)


               precision    recall  f1-score   support

OBS-UNCERTAIN       0.95      0.88      0.91       650
         ANAT       0.84      0.88      0.86      4139
  OBS-PRESENT       0.79      0.80      0.79      3792
   OBS-ABSENT       0.85      0.84      0.85      2055
            O       0.90      0.88      0.89     13437

     accuracy                           0.87     24073
    macro avg       0.87      0.86      0.86     24073
 weighted avg       0.87      0.87      0.87     24073



In [None]:
import torch
from transformers import BertTokenizer, BertForTokenClassification

# Load the model and tokenizer
model_path = '/content/ner_model9'
model = BertForTokenClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)

# Ensure the model is in evaluation mode
model.eval()

# Print the label mapping (index to label)
label_list = model.config.id2label
print("Label Mapping:", label_list)

def predict_entities(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

    # Get model predictions
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted token classes (logits to labels)
    predictions = torch.argmax(outputs.logits, dim=2)

    # Convert token ids back to words
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze().tolist())

    # Map predicted indices to labels
    predicted_labels = [label_list[int(p)] for p in predictions.squeeze().tolist()]

    # Combine tokens and their predicted labels
    entities = [(token, label) for token, label in zip(tokens, predicted_labels)]

    return entities

# Example usage
text = "BILATERAL MLO ve CC MAMOGRAFİ\nMemeler ACR Tip C heterojen yoğun paternde olup, mamografik duyarlılık azalmıştır.Sağ meme; CC grafi santral kesimde milimetrik nodüler fokal asimetrik dansite izlendi. Farklı kadranlarda konturları fibroglandüler parankimle örtülü izodens nodüler lezyonlar mevcuttur. Sınırları seçilebilen kitle lezyonu, kalsifikasyon, yapısal bozulma saptanmamıştır.Sağ aksillada incelenen kesimde patolojik boyut ve görünümde büyümüş lenf nodu yoktur.Sol memede ;alt iç kadranda irregüler şekilli konturu düzensiz olarak izlenen 11 mm boyutunda nodüler kitlesel lezyon izlendi. Tüm kadranlarda konturları fibroglandüler parankimle örtülü izodens nodüler şüpheli lezyonlar mevcuttur.Sol aksillada incelenen kesimde patolojik boyut ve görünümde büyümüş lenf nodu yoktur.SONUÇ: BIRADS-4 "
entities = predict_entities(text)

for token, label in entities:
    print(f"{token}: {label}")


Label Mapping: {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2', 3: 'LABEL_3', 4: 'LABEL_4', 5: 'LABEL_5'}
[CLS]: LABEL_1
BI: LABEL_1
##LA: LABEL_1
##TER: LABEL_1
##AL: LABEL_1
M: LABEL_1
##LO: LABEL_1
ve: LABEL_1
CC: LABEL_1
MA: LABEL_1
##M: LABEL_1
##OG: LABEL_1
##RAF: LABEL_1
##İ: LABEL_1
Meme: LABEL_2
##ler: LABEL_2
AC: LABEL_0
##R: LABEL_0
Tip: LABEL_0
C: LABEL_0
he: LABEL_0
##ter: LABEL_0
##ojen: LABEL_0
yoğun: LABEL_0
pat: LABEL_0
##ern: LABEL_0
##de: LABEL_0
olup: LABEL_1
,: LABEL_1
mam: LABEL_0
##ografik: LABEL_0
duyarlılık: LABEL_0
azal: LABEL_0
##mıştır: LABEL_1
.: LABEL_1
Sağ: LABEL_2
meme: LABEL_1
;: LABEL_1
CC: LABEL_1
graf: LABEL_1
##i: LABEL_1
santral: LABEL_2
kesim: LABEL_1
##de: LABEL_1
mil: LABEL_1
##imet: LABEL_1
##rik: LABEL_1
no: LABEL_1
##düler: LABEL_1
f: LABEL_1
##okal: LABEL_1
as: LABEL_0
##imet: LABEL_0
##rik: LABEL_0
dans: LABEL_1
##ite: LABEL_1
izlen: LABEL_1
##di: LABEL_1
.: LABEL_2
Farklı: LABEL_2
kadr: LABEL_1
##anlar: LABEL_1
##da: LABEL_1
kont: LABEL_1
##urla

In [None]:
!pip install transformers
!pip install huggingface-hub




In [None]:
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your ter

In [None]:
from transformers import BertTokenizer, BertForTokenClassification
from huggingface_hub import HfApi

# Load your saved model and tokenizer
model = BertForTokenClassification.from_pretrained("ner_model3")
tokenizer = BertTokenizer.from_pretrained("ner_model3")

# Set your desired model name (this will be the name on Hugging Face)
model_name = "comp-model"  # Change this to your desired model name

# Push the model and tokenizer to Hugging Face
model.push_to_hub(model_name)
tokenizer.push_to_hub(model_name)


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AAkay/comp-model/commit/b2e012f048cc4dcfdcf29220841e1e094ae1f592', commit_message='Upload tokenizer', commit_description='', oid='b2e012f048cc4dcfdcf29220841e1e094ae1f592', pr_url=None, pr_revision=None, pr_num=None)