In [None]:
!pip install transformers &> /dev/null
# !pip install spacy==3 &> /dev/null
!pip install pytorch-lightning &> /dev/null

In [None]:
# In colab - restart the runtime after downloading the model
# !python -m spacy download de_core_news_lg &> /dev/null

In [1]:
import pandas as pd
# import spacy
import re
import numpy as np
import time
import tqdm.notebook as tqdm
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.mode.chained_assignment = None

### Uploading and preprocessing the data

In [None]:
data=pd.read_csv('/content/drive/My Drive/Colab Notebooks/data/HP/HP_val_ratings.txt')
data = data.drop(columns=['Satz_Pos_Buch_Neu', 'Kapitel'])
data['valence_mean_z'] = data['valence_mean_z'].str.replace(',','.').astype(float).values
data_char=data[data.Satz.str.contains('Cedric|Dumbledore|Harry|Hagrid|Hermine|'
                                     'Lockhart|Lupin|McGonagall|Ron|Snape|'
                                  'Voldemort|Salazar|Malfoy|Lucius|Grindelwald'
                                  '|Filch|Bellatrix|Bartemius|Crabbe|'
                                  'Augustus')]
data_char = data_char.loc[data_char['Satz'].str.len() <= 60] # selecting only short sentences

In [None]:
# [1] Use this data partitioning to train the model for extracting sentence embeddings. 

X_test = data.loc[data_char.index, ['Satz']]
y_test  = data.loc[data_char.index, ['valence_mean_z']]
X_train = data.loc[[x for x in data.index if x not in data_char.index], ['Satz']]
y_train  = data.loc[[x for x in data.index if x not in data_char.index], ['valence_mean_z']]
print(X_test.shape, y_test.shape)
print(X_train.shape, y_train.shape)

(1336, 1) (1336, 1)
(9818, 1) (9818, 1)


In [None]:
# [2] Use this data partitioning to train the model for testing using 274 test sentences. 

''' test_indecies - the file that contains the indecies of the sentences 
    that were used in the SentiArt section for testing (274 sentences) '''

with open('test_indecies.txt') as f:
    w = [int(x) for x in next(f).split()]
    array = [[int(x) for x in line.split()] for line in f]
    indecies = [item for sublist in array for item in sublist]
    indecies.insert(0, 8041)

X_test = data.loc[indecies, ['Satz']]
y_test  = data.loc[indecies, ['valence_mean_z']]
X_train = data.loc[[x for x in data.index if x not in indecies], ['Satz']]
y_train  = data.loc[[x for x in data.index if x not in indecies], ['valence_mean_z']]

print(X_test.shape, X_train.shape)

In [None]:
ind = [8506, 1269, 890, 10418, 10414, 10341]

In [None]:
# Checking if the sentences we added exist only in the train set.

for i in ind:
    print("X_train: ", i in X_train.index)
    print("X_test: ", i in X_test.index)

X_train:  False
X_test:  True
X_train:  False
X_test:  True
X_train:  False
X_test:  True
X_train:  False
X_test:  True
X_train:  False
X_test:  True
X_train:  False
X_test:  True


### BERT

In [None]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast as BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
import pytorch_lightning as pl
# from pytorch_lightning.metrics.functional import accuracy, f1, auroc
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
%matplotlib inline
%config InlineBackend.figure_format='retina'
RANDOM_SEED = 42
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 12, 8
pl.seed_everything(RANDOM_SEED, workers=True)

Global seed set to 42


42

In [None]:
from transformers import AdamW, BertForSequenceClassification

#BERT_MODEL_NAME = 'bert-base-german-cased'
BERT_MODEL_NAME = 'deepset/gbert-base'

tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)

Downloading:   0%|          | 0.00/234k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/83.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/362 [00:00<?, ?B/s]

In [None]:
class HPDataset(Dataset):
  def __init__(
    self,
    data: pd.DataFrame,
    tokenizer: BertTokenizer,
    max_token_len: int = 512
  ):
    self.tokenizer = tokenizer
    self.data = data
    self.max_token_len = max_token_len
  def __len__(self):
    return len(self.data)
  def __getitem__(self, index: int):
    data_row = self.data.iloc[index]
    sentence = data_row.Satz
    labels = data_row['valence_mean_z']
    encoding = self.tokenizer.encode_plus(
      sentence,
      add_special_tokens=True,
      max_length=self.max_token_len,
      return_token_type_ids=False,
      padding="max_length",
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt',
    )
    return dict(
      sentence=sentence,
      input_ids=encoding["input_ids"].flatten(),
      attention_mask=encoding["attention_mask"].flatten(),
      labels=torch.from_numpy(np.array(labels))
    )

In [None]:
bert_model = BertForSequenceClassification.from_pretrained(BERT_MODEL_NAME, num_labels=1, \
                                                           torchscript=True, output_hidden_states = True)

Downloading:   0%|          | 0.00/422M [00:00<?, ?B/s]

Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

In [None]:
class HPDataModule(pl.LightningDataModule):
  def __init__(self, train_df, test_df, tokenizer, batch_size=8, max_token_len=128):
    super().__init__()
    self.batch_size = batch_size
    self.train_df = train_df
    self.test_df = test_df
    self.tokenizer = tokenizer
    self.max_token_len = max_token_len
  def setup(self, stage=None):
    self.train_dataset = HPDataset(
      self.train_df,
      self.tokenizer,
      self.max_token_len
    )
    self.test_dataset = HPDataset(
      self.test_df,
      self.tokenizer,
      self.max_token_len
    )
  def train_dataloader(self):
    return DataLoader(
      self.train_dataset,
      batch_size=self.batch_size,
      shuffle=True,
      num_workers=4
    )
  def val_dataloader(self):
    return DataLoader(
      self.test_dataset,
      batch_size=self.batch_size,
      num_workers=4
    )
  def test_dataloader(self):
    return DataLoader(
      self.test_dataset,
      batch_size=self.batch_size,
      num_workers=4
    )

In [None]:
class HPM(pl.LightningModule):
  def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
    super().__init__()
    self.bert = BertForSequenceClassification.from_pretrained(BERT_MODEL_NAME,num_labels=n_classes, \
                                                              torchscript=True, output_hidden_states=True)
    self.n_training_steps = n_training_steps
    self.n_warmup_steps = n_warmup_steps
  def forward(self, input_ids, attention_mask, labels=None):
    output = self.bert(input_ids = input_ids, token_type_ids=None, attention_mask=attention_mask, labels = labels)
    loss = output[0].sum()
    return loss, output
  def training_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"].float()
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("train_loss", loss, prog_bar=True, logger=True)
    return {"loss": loss, "predictions": outputs, "labels": labels}
  def validation_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"].float()
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("val_loss", loss, prog_bar=True, logger=True)
    return loss
  def test_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"].float()
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("test_loss", loss, prog_bar=True, logger=True)
    return loss
  def configure_optimizers(self):
    optimizer = AdamW(self.parameters(), lr=1e-5)
    scheduler = get_cosine_schedule_with_warmup(
      optimizer,
      num_warmup_steps=self.n_warmup_steps,
      num_training_steps=self.n_training_steps
    )
    return dict(
      optimizer=optimizer,
      lr_scheduler=dict(
        scheduler=scheduler,
        interval='step'
      )
    )

In [None]:
train_df = X_train.merge(pd.DataFrame(y_train, index=X_train.index), how='left', left_index=True, right_index=True)
val_df = X_test.merge(pd.DataFrame(y_test, index=X_test.index), how='left', left_index=True, right_index=True)

print('Train shape: ', train_df.shape, '\n',
      'Test shape: ', val_df.shape)

In [None]:
# TRAINING 

torch.manual_seed(42)

import random
random.seed(42)

import numpy as np
np.random.seed(42)

N_EPOCHS = 2
BATCH_SIZE = 10

data_module = HPDataModule(
  train_df,
  val_df,
  tokenizer,
  batch_size=BATCH_SIZE,
  max_token_len=512
)

steps_per_epoch=len(train_df) // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS

warmup_steps = 10
warmup_steps, total_training_steps

model = HPM(
  n_classes=1,
  n_warmup_steps=warmup_steps,
  n_training_steps=total_training_steps
)

trainer = pl.Trainer(
  max_epochs=N_EPOCHS,
  gpus=1,
  progress_bar_refresh_rate=30,
  deterministic=True
)

trainer.fit(model, data_module)

Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 42


Training: -1it [00:00, ?it/s]

  f"One of the returned values {set(extra.keys())} has a `grad_fn`. We will detach it automatically"


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [None]:
# TESTING

trainer.test()
trained_model = HPM.load_from_checkpoint(
  trainer.checkpoint_callback.best_model_path,
  n_classes=1, output_hidden_states = True, deterministic=True
)
trained_model.eval()
trained_model.freeze()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
trained_model = trained_model.to(device)
val_dataset = HPDataset(
  val_df,
  tokenizer,
  max_token_len=512
)
predictions = []
labels = []

for item in tqdm(val_dataset):
  prediction = trained_model(
                              item["input_ids"].unsqueeze(dim=0).to(device),
                              item["attention_mask"].unsqueeze(dim=0).to(device)
                             )[0]
  predictions.append(prediction.flatten().cpu().numpy()[0])#.flatten())
  labels.append(item["labels"].flatten().cpu().numpy()[0])
# predictions = torch.stack(predictions).detach().cpu()
# labels = torch.stack(labels).detach().cpu()
# print(round(r2_score(labels, predictions), 3))

  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.33647632598876953}
--------------------------------------------------------------------------------


Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

  0%|          | 0/1336 [00:00<?, ?it/s]

### [1] Computing the R^2 values using 274 test sentences. 

In [None]:
# 1 epoch 
predictions = [round(x, 3) for x in predictions]
print('R^2: ', round(r2_score(labels, predictions), 3))
print('Labels: ', labels[-6:])
print('Preds:  ', predictions[-6:])

R^2:  0.378
Labels:  [1.85, 1.9, 2.25, -2.3, -2.55, -2.6]
Preds:   [0.635, 0.777, 0.621, -0.555, -1.234, -1.229]


In [None]:
# 2 epochs
predictions = [round(x, 3) for x in predictions]
print('R^2: ', round(r2_score(labels, predictions), 3))
print('Labels: ', labels[-6:])
print('Preds:  ', predictions[-6:])

R^2:  0.408
Labels:  [1.85, 1.9, 2.25, -2.3, -2.55, -2.6]
Preds:   [0.897, 1.121, 0.916, -0.842, -1.463, -1.488]


In [None]:
# 3 epochs
predictions = [round(x, 3) for x in predictions]
print('R^2: ', round(r2_score(labels, predictions), 3))
print('Labels: ', labels[-6:])
print('Preds:  ', predictions[-6:])

R^2:  0.407
Labels:  [1.85, 1.9, 2.25, -2.3, -2.55, -2.6]
Preds:   [1.192, 1.26, 1.192, -0.785, -1.466, -1.449]


In [None]:
# 4 epochs
predictions = [round(x, 3) for x in predictions]
print('R^2: ', round(r2_score(labels, predictions), 3))
print('Labels: ', labels[-6:])
print('Preds:  ', predictions[-6:])

R^2:  0.394
Labels:  [1.85, 1.9, 2.25, -2.3, -2.55, -2.6]
Preds:   [1.289, 1.584, 1.51, -0.811, -1.423, -1.563]


In [None]:
# 5 epochs
predictions = [round(x, 3) for x in predictions]
print('R^2: ', round(r2_score(labels, predictions), 3))
print('Labels: ', labels[-6:])
print('Preds:  ', predictions[-6:])

R^2:  0.443
Labels:  [1.85, 1.9, 2.25, -2.3, -2.55, -2.6]
Preds:   [1.25, 1.475, 1.459, -1.044, -1.734, -1.587]


In [None]:
# 6 epochs
predictions = [round(x, 2) for x in predictions]
print('R^2: ', round(r2_score(labels, predictions), 3))
print('Labels: ', labels[-6:])
print('Preds:  ', predictions[-6:])

R^2:  0.424
Labels:  [1.85, 1.9, 2.25, -2.3, -2.55, -2.6]
Preds:   [1.27, 1.7, 1.52, -1.04, -1.76, -1.8]


### [2] Computing the R^2 values using 1336 test sentences.

In [None]:
# embeddings 1 epoch
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))
print(len(predictions))
#0.328

0.316


In [None]:
# embeddings 1 epoch - small
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))
print(len(predictions))


0.328


In [None]:
# embeddings 2 epochs
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))
print(len(predictions))

0.358
1336


In [None]:
# embeddings 2 epochs - small test
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))
print(len(predictions))

0.396


In [None]:
# embeddings 3 epochs
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))
print(len(predictions))

0.347


In [None]:
# embeddings 3 epochs - small
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))
print(len(predictions))

0.387


In [None]:
# embeddings 4 epochs
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))

0.336


In [None]:
# embeddings 4 epochs - small
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))

0.366


In [None]:
# embeddings 5 epochs
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))

0.324


In [None]:
# embeddings 5 epochs - small
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))

0.356


In [None]:
# embeddings 6 epochs
predictions = [round(x, 2) for x in predictions]
print(round(r2_score(labels, predictions), 3))

0.325


## Extracting sentence embeddings

In [None]:
test_sentence = '''"Wunderbar", sagte Dumbledore strahlend.'''
encoding = tokenizer.encode_plus(
  test_sentence,
  add_special_tokens=True,
  max_length=512,
  return_token_type_ids=False,
  padding="max_length",
  return_attention_mask=True,
  return_tensors='pt',
)
embeddings = []
embedding = trained_model(encoding["input_ids"].to(device), encoding["attention_mask"].to(device), labels=None)[1]
embedding = embedding[1]
embeddings.append(embedding)

test_sentence = '''"Jep", sagte Harry.'''
encoding = tokenizer.encode_plus(
  test_sentence,
  add_special_tokens=True,
  max_length=512,
  return_token_type_ids=False,
  padding="max_length",
  return_attention_mask=True,
  return_tensors='pt',
)
embedding = trained_model(encoding["input_ids"].to(device), encoding["attention_mask"].to(device), labels=None)[1][1]
embedding = embedding[1]
embeddings.append(embedding)

In [None]:
print ("Number of layers:", len(embeddings[0]), "  (initial embeddings + 12 BERT layers)")
layer_i = 0

print ("Number of batches:", len(embeddings[0][layer_i]))
batch_i = 0

print ("Number of tokens:", len(embeddings[0][layer_i][batch_i]))
token_i = 0

print ("Number of hidden units:", len(embeddings[0][layer_i][batch_i][token_i]))

Number of layers: 13   (initial embeddings + 12 BERT layers)
Number of batches: 1
Number of tokens: 512
Number of hidden units: 768


In [None]:
# source: https://mccormickml.com/2019/05/14/BERT-word-embeddings-tutorial/#32-understanding-the-output

def extract_embeddings(hidden_state, n_layer=4, multiple_layers=False):
  hidden_state = torch.stack(tuple(t.cpu() for t in hidden_state), dim=0)
  hidden_state = torch.squeeze(hidden_state, dim=1)
  token_vecs_sum = []
  if multiple_layers:
    #print(hidden_state[n_layer:-1].shape)
    token_vecs = hidden_state[n_layer:-1]
    return token_vecs.view(768, -1).sum(1)
    #return torch.mean(token_vecs, dim=1) #sentence_embedding.cpu().numpy()
  else:
    token_vecs = hidden_state[n_layer]
    return torch.mean(token_vecs, dim=0)

In [None]:
extract_embeddings(embeddings[0], n_layer=9, multiple_layers=True).shape

torch.Size([768])

In [None]:
for layer in range(6, 12):
  resulting_embs = []
  for item in tqdm(val_dataset):
    emb = trained_model(
                                      item["input_ids"].unsqueeze(dim=0).to(device),
                                item["attention_mask"].unsqueeze(dim=0).to(device)
    )[1][1]
    resulting_embs.append(extract_embeddings(emb, layer))

  res = [x.cpu().numpy() for x in resulting_embs]

  output = pd.DataFrame(data=res, index = val_df.index)

  output.to_csv('embeddings_bert_'+str(layer)+'-12_sum.csv')


  0%|          | 0/1336 [00:00<?, ?it/s]

  0%|          | 0/1336 [00:00<?, ?it/s]

  0%|          | 0/1336 [00:00<?, ?it/s]

  0%|          | 0/1336 [00:00<?, ?it/s]

  0%|          | 0/1336 [00:00<?, ?it/s]

  0%|          | 0/1336 [00:00<?, ?it/s]

In [None]:
print('Shape: %d x %d' % (len(resulting_embs), len(resulting_embs[0])))

Shape: 1336 x 768


In [None]:
res = [x.cpu().numpy() for x in resulting_embs]

In [None]:
output = pd.DataFrame(data=res, index = val_df.index)

In [None]:
output.to_csv('embeddings_bert_6l.csv')

In [None]:
e = [extract_embeddings(trained_model(encoding["input_ids"].to(device),\
                                      encoding["attention_mask"].to(device), labels=None)[1][1])]

In [None]:
embs=[]
cat_vec=[]
for i in range(len(embeddings)):
  token_embeddings = embeddings[i]
  token_embeddings = torch.stack(tuple(t.cpu() for t in token_embeddings), dim=0)
  token_embeddings = torch.squeeze(token_embeddings, dim=1)
  token_embeddings = token_embeddings.permute(1,0,2)

  for k in range(len(token_embeddings)):
    token_e = token_embeddings[k]
    # token_embeddings = torch.squeeze(token_embeddings, dim=1) 
    token_vecs = token_e[-1:]#torch.mean(token_e[-1:], dim=0)
    # embs.append(token_vecs)
    sentence_embedding = torch.mean(token_vecs, dim=0)
    embs.append(sentence_embedding)

In [None]:
token_vecs_cat = []

# `token_embeddings` is a [22 x 12 x 768] tensor.

# For each token in the sentence...
for token in token_embeddings:
    
    # `token` is a [12 x 768] tensor

    # Concatenate the vectors (that is, append them together) from the last 
    # four layers.
    # Each layer vector is 768 values, so `cat_vec` is length 3,072.
    cat_vec = torch.cat((token[-1], token[-2], token[-3], token[-4]), dim=0)
    
    # Use `cat_vec` to represent `token`.
    token_vecs_cat.append(cat_vec)
  
print ('Shape is: %d x %d' % (len(token_vecs_cat), len(token_vecs_cat[0])))

Shape is: 512 x 3072


In [None]:
!zip -r /content/file.zip /content/lightning_logs/version_0

  adding: content/lightning_logs/version_0/ (stored 0%)
  adding: content/lightning_logs/version_0/hparams.yaml (stored 0%)
  adding: content/lightning_logs/version_0/events.out.tfevents.1635167801.b8dc9f8d47cd.91.1 (deflated 32%)
  adding: content/lightning_logs/version_0/checkpoints/ (stored 0%)
  adding: content/lightning_logs/version_0/checkpoints/epoch=1-step=1963.ckpt (deflated 16%)
  adding: content/lightning_logs/version_0/events.out.tfevents.1635166554.b8dc9f8d47cd.91.0 (deflated 63%)


In [None]:
from google.colab import files
files.download("/content/embeddings_bert_12.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>