**Installing Required Packages**

In [None]:
!pip install simpletransformers



**# Importing required Libraries**

In [None]:
import pandas as pd
from simpletransformers.ner import NERModel,NERArgs   
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


**Custom Bert Model**

In [None]:
def custom_bert_model(dataset, text):
  '''
  Input: Dataset, text 
  Output: List of dictionary having Entity along with Entity name     
  '''

  # Reading the dataset
  data = pd.read_csv(dataset,encoding="latin1" )
  # forward filling the Nan 
  data =data.fillna(method ="ffill")
  # Performing label enconding
  data["Sentence #"] = LabelEncoder().fit_transform(data["Sentence #"])
  # Renaming the columns
  data.rename(columns={"Sentence #":"sentence_id","Word":"words","Tag":"labels"}, inplace =True)
  data["labels"] = data["labels"].str.upper()
  # Independent data
  X= data[["sentence_id","words"]]
  # Storing dependent/label in Y
  Y =data["labels"]
  # Spliting dataset into training and testing 
  x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size =0.2)
  train_data = pd.DataFrame({"sentence_id":x_train["sentence_id"],"words":x_train["words"],"labels":y_train})
  test_data = pd.DataFrame({"sentence_id":x_test["sentence_id"],"words":x_test["words"],"labels":y_test})
  
  label = data["labels"].unique().tolist()
  # Model building
  args = NERArgs()
  args.num_train_epochs = 1
  args.learning_rate = 1e-4
  args.overwrite_output_dir =True
  args.train_batch_size = 32
  args.eval_batch_size = 32
  model = NERModel('bert', 'bert-base-cased',labels=label,args =args)
  # Model training
  model.train_model(train_data,eval_data = test_data,acc=accuracy_score)
  result, model_outputs, preds_list = model.eval_model(test_data)
  # for result prediction
  prediction, model_output = model.predict([text])

  # returning the predicted result
  return prediction


In [None]:
text =""" In this talk, you will hear about where we are with cancer care in the UK with guest speaker Dr Ursula McGovern. We bust a few myths, put some preconceptions of cancer treatment to the test and discuss some of the breakthrough new treatments for our patients living with a cancer diagnosis."""
dataset = "/content/ner_dataset.csv"
print(custom_bert_model(dataset,text))


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/104 [00:00<?, ?it/s]



  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/101 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

[[{'In': 'O'}, {'this': 'O'}, {'talk,': 'O'}, {'you': 'O'}, {'will': 'O'}, {'hear': 'O'}, {'about': 'O'}, {'where': 'O'}, {'we': 'O'}, {'are': 'O'}, {'with': 'O'}, {'cancer': 'O'}, {'care': 'O'}, {'in': 'O'}, {'the': 'O'}, {'UK': 'B-GEO'}, {'with': 'O'}, {'guest': 'O'}, {'speaker': 'O'}, {'Dr': 'B-PER'}, {'Ursula': 'B-PER'}, {'McGovern.': 'I-PER'}, {'We': 'O'}, {'bust': 'O'}, {'a': 'O'}, {'few': 'O'}, {'myths,': 'O'}, {'put': 'O'}, {'some': 'O'}, {'preconceptions': 'O'}, {'of': 'O'}, {'cancer': 'O'}, {'treatment': 'O'}, {'to': 'O'}, {'the': 'O'}, {'test': 'O'}, {'and': 'O'}, {'discuss': 'O'}, {'some': 'O'}, {'of': 'O'}, {'the': 'O'}, {'breakthrough': 'O'}, {'new': 'O'}, {'treatments': 'O'}, {'for': 'O'}, {'our': 'O'}, {'patients': 'O'}, {'living': 'O'}, {'with': 'O'}, {'a': 'O'}, {'cancer': 'O'}, {'diagnosis.': 'O'}]]
