In [68]:
from transformers import BertModel, BertTokenizer
import torch

In [69]:
model = BertModel.from_pretrained('bert-base-uncased')

In [70]:
sentence = ["She is a Machine Learning engineer and works in california. Today I want to work in my thesis","She is not a Machine Learning Engineer and word in san francisco"]

In [71]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [72]:
tokens = tokenizer.tokenize(sentence[0])
tokens

['she',
 'is',
 'a',
 'machine',
 'learning',
 'engineer',
 'and',
 'works',
 'in',
 'california',
 '.',
 'today',
 'i',
 'want',
 'to',
 'work',
 'in',
 'my',
 'thesis']

In [73]:
tokens = ['[CLS]'] + tokens + ['[SEP]']
print(tokens)

['[CLS]', 'she', 'is', 'a', 'machine', 'learning', 'engineer', 'and', 'works', 'in', 'california', '.', 'today', 'i', 'want', 'to', 'work', 'in', 'my', 'thesis', '[SEP]']


In [74]:
tokens = tokens + ['[PAD]'] + ['[PAD]']
print(tokens)

['[CLS]', 'she', 'is', 'a', 'machine', 'learning', 'engineer', 'and', 'works', 'in', 'california', '.', 'today', 'i', 'want', 'to', 'work', 'in', 'my', 'thesis', '[SEP]', '[PAD]', '[PAD]']


In [75]:
print(len(tokens))

23


In [76]:
attention_mask = [1 if i!= '[PAD]' else 0 for i in tokens]
print(attention_mask)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]


In [77]:
token_ids = tokenizer.convert_tokens_to_ids(tokens)
print(token_ids)


[101, 2016, 2003, 1037, 3698, 4083, 3992, 1998, 2573, 1999, 2662, 1012, 2651, 1045, 2215, 2000, 2147, 1999, 2026, 9459, 102, 0, 0]


In [78]:
token_ids = torch.tensor(token_ids).unsqueeze(0)

attention_mask = torch.tensor(attention_mask).unsqueeze(0)

In [79]:
output = model(token_ids, attention_mask = attention_mask)
output

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.0039,  0.2669, -0.1215,  ..., -0.4232,  0.4911,  0.1651],
         [ 0.5212, -0.3369, -0.2246,  ..., -0.5175,  0.6155, -0.0176],
         [ 0.2355,  0.1010,  0.3829,  ..., -0.9268,  0.2483,  0.7237],
         ...,
         [ 0.0283,  0.5550, -0.1473,  ..., -0.0367,  0.0136, -0.4383],
         [ 0.0763, -0.0643,  0.1149,  ...,  0.2775,  0.3979, -0.1012],
         [ 0.0151, -0.1723,  0.1306,  ...,  0.3458,  0.4224, -0.2350]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-8.8428e-01, -5.7315e-01, -9.4131e-01,  8.3235e-01,  8.3296e-01,
         -3.5361e-01,  7.3923e-01,  4.7880e-01, -7.3270e-01, -9.9998e-01,
         -4.1440e-01,  9.5079e-01,  9.7844e-01,  6.3562e-01,  9.1822e-01,
         -7.0859e-01, -1.3589e-04, -7.4268e-01,  4.1455e-01, -1.1715e-01,
          7.9340e-01,  1.0000e+00,  3.4977e-02,  4.5880e-01,  5.2475e-01,
          9.8978e-01, -8.1371e-01,  9.3197e-01,  9.3698e-01,  7.484

In [80]:
print(output[0].shape)
print(output[0])

torch.Size([1, 23, 768])
tensor([[[-0.0039,  0.2669, -0.1215,  ..., -0.4232,  0.4911,  0.1651],
         [ 0.5212, -0.3369, -0.2246,  ..., -0.5175,  0.6155, -0.0176],
         [ 0.2355,  0.1010,  0.3829,  ..., -0.9268,  0.2483,  0.7237],
         ...,
         [ 0.0283,  0.5550, -0.1473,  ..., -0.0367,  0.0136, -0.4383],
         [ 0.0763, -0.0643,  0.1149,  ...,  0.2775,  0.3979, -0.1012],
         [ 0.0151, -0.1723,  0.1306,  ...,  0.3458,  0.4224, -0.2350]]],
       grad_fn=<NativeLayerNormBackward0>)


In [81]:
print(output[1].shape)
print(output[1])

torch.Size([1, 768])
tensor([[-8.8428e-01, -5.7315e-01, -9.4131e-01,  8.3235e-01,  8.3296e-01,
         -3.5361e-01,  7.3923e-01,  4.7880e-01, -7.3270e-01, -9.9998e-01,
         -4.1440e-01,  9.5079e-01,  9.7844e-01,  6.3562e-01,  9.1822e-01,
         -7.0859e-01, -1.3589e-04, -7.4268e-01,  4.1455e-01, -1.1715e-01,
          7.9340e-01,  1.0000e+00,  3.4977e-02,  4.5880e-01,  5.2475e-01,
          9.8978e-01, -8.1371e-01,  9.3197e-01,  9.3698e-01,  7.4841e-01,
         -5.9865e-01,  2.7322e-01, -9.9227e-01, -3.2181e-01, -9.7809e-01,
         -9.9466e-01,  5.5820e-01, -6.8825e-01, -1.0108e-01,  7.1433e-02,
         -8.9692e-01,  4.6916e-01,  9.9999e-01, -2.5285e-01,  6.0688e-01,
         -2.8009e-01, -1.0000e+00,  4.0834e-01, -8.9145e-01,  8.8339e-01,
          8.5090e-01,  8.5277e-01,  2.5701e-01,  5.4638e-01,  4.9562e-01,
         -3.5907e-01, -2.0353e-02,  2.0877e-01, -4.2000e-01, -6.6615e-01,
         -7.2082e-01,  5.3606e-01, -9.0515e-01, -8.9556e-01,  8.4203e-01,
          9.0728e

In [82]:
!pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.70.0-py3-none-any.whl.metadata (42 kB)
     ---------------------------------------- 0.0/42.4 kB ? eta -:--:--
     ------------------ ------------------- 20.5/42.4 kB 682.7 kB/s eta 0:00:01
     -------------------------------------- 42.4/42.4 kB 412.4 kB/s eta 0:00:00
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
     ---------------------------------------- 0.0/43.6 kB ? eta -:--:--
     ---------------------------------------- 43.6/43.6 kB 2.2 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with st


[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [85]:
from simpletransformers.language_representation import RepresentationModel

sentences = ["Machine Learning and Deep Learning are part of AI", "Data Science will excel in future"]

model = RepresentationModel(
    model_type = "bert",
    model_name = "bert-base-uncased",
    use_cuda = True
)

word_vectors = model.encode_sentences(sentences, combine_strategy = None)

In [86]:
word_vectors.shape

(2, 11, 768)

In [87]:
word_vectors

array([[[-0.23119791,  0.26004937, -0.27770406, ..., -0.2934025 ,
          0.804319  ,  0.42281473],
        [-0.3143458 ,  0.5668908 , -0.12599672, ..., -0.4730684 ,
          0.916663  , -0.55583394],
        [-0.10503935,  0.41539976,  0.4367812 , ..., -0.4926567 ,
          0.2915908 , -0.54653597],
        ...,
        [-0.10692384,  0.14539401, -0.36244187, ..., -0.41278127,
          0.49094978, -0.20988023],
        [-0.5188837 , -0.6581017 , -0.9108163 , ...,  0.37581968,
          0.88491195, -0.1731022 ],
        [ 0.5081882 ,  0.12891915, -0.5035092 , ...,  0.156108  ,
         -0.536438  , -0.5237989 ]],

       [[-0.17415059,  0.16519125,  0.07086745, ..., -0.2963486 ,
          0.3384182 , -0.11301661],
        [-0.18327022, -0.01264788,  0.4876518 , ..., -0.38039458,
          0.0735445 , -0.5296368 ],
        [ 0.03186131, -0.18739481,  1.1199812 , ..., -0.45969406,
         -0.46317557, -0.49477333],
        ...,
        [-0.00217983, -0.12765382,  0.28663245, ..., -

In [92]:
from simpletransformers.language_representation import RepresentationModel

sentences = ["Machine Learning and Deep Learning are part of AI", "Data Science will excel in future"]

model = RepresentationModel(
    model_type = "bert",
    model_name = "bert-base-uncased",
    use_cuda = True
)

sentence_vectors = model.encode_sentences(sentences, combine_strategy = "mean")

sentence_vectors.shape

(2, 768)