In [55]:
# Import the required packages
import pandas as pd
import torch
import numpy as np
import torch.nn.functional as F
from ast import literal_eval
from transformers import AutoModel, AutoTokenizer
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from collections import Counter
from torch import optim
import torch.nn as nn
from tqdm.auto import tqdm
from transformers import BertTokenizer, BertTokenizerFast
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch.nn as nn
import torchmetrics


# Data visualization

Let us visualize the dataset from the [NBME - Score Clinical Patient Notes](https://www.kaggle.com/competitions/nbme-score-clinical-patient-notes/overview) Kaggle competition.

#### Training data

* **patient_notes.csv** - A collection of about 40,000 Patient Note history portions. Only a subset of these have features annotated. You may wish to apply unsupervised learning techniques on the notes without annotations. The patient notes in the test set are not included in the public version of this file.
    * `pn_num` - A unique identifier for each patient note.
    * `case_num` - A unique identifier for the clinical case a patient note represents.
    * `pn_history` - The text of the encounter as recorded by the test taker.
* **features.csv** - The rubric of features (or key concepts) for each clinical case.
    * `feature_num` - A unique identifier for each feature.
    * `case_num` - A unique identifier for each case.
    * `feature_text` - A description of the feature.
* **train.csv** - Feature annotations for 1000 of the patient notes, 100 for each of ten cases.
    * `id` - Unique identifier for each patient note / feature pair.
    * `pn_num` - The patient note annotated in this row.
    * `feature_num` - The feature annotated in this row.
    * `case_num` - The case to which this patient note belongs.
    * `annotation` - The text(s) within a patient note indicating a feature. A feature may be indicated multiple times within a single note.
    * `location` - Character spans indicating the location of each annotation within the note. Multiple spans may be needed to represent an annotation, in which case the spans are delimited by a semicolon `;`.

## Data processing

* Some text is in uppercase, other in lowercase, without any semantic significance. Therefore, we decide to use lowercase text.
* We decided to only map most common medical abbreviations to each other. The preprocessing function resppnsible for that is `clean_abbrev` (see cell below). We contemplated expanding the most common abreviations, but finally we decided not to do it, since it could lead to complex token offset mapping

In [24]:
data_exists = True

In [25]:
# Define useful functions for data pre-processing and cleaning

def process_feature_text(text):
    '''Given a feature text as input, first it replaces '-OR-' by ';-' and then
    it replaces '-' by ' '. It returns the processed text as an output.'''
    # Replace '-OR-' by ';-' 
    text = text.replace('-OR-', ';-')
    # Replace '-' by ' ' 
    text = text.replace('-', ' ')
    return text

def clean_abbrev(text):
    '''Given a text as an input, it groups together medical abbreviations.
    It returns the processed text as an output.'''
    # note : the white space is added to keep character position of the answers
    text = text.replace('FHx', 'FH ') # FHx (family history)
    text = text.replace('FHX', 'FH ')
    text = text.replace('PMHx', 'PMH ') # PMHx (past medical history)
    text = text.replace('PMHX', 'PMH ')
    text = text.replace('SHx', 'SH ') # SHx (social history)
    text = text.replace('SHX', 'SH ')
    # Convert all the text to lowercase
    #text = text.lower()
    return text

In [26]:
if not data_exists:
    # Prepare the dataset

    # Create dataframe from 'patient_notes.csv'
    patient_notes_df = pd.read_csv('patient_notes.csv')

    # Create dataframe from 'features.csv'
    features_df = pd.read_csv('features.csv')

    # Create dataframe from 'train.csv'
    df = pd.read_csv('train.csv')

    # Convert content in 'annotation' column in df dataframe to list
    # e.g. type(['palpitations', 'heart beating/pounding']) == <class 'str'> 
    # ----> type(['palpitations', 'heart beating/pounding']) == <class 'list'>
    df['annotation'] = [literal_eval(x) for x in df['annotation']]

    # Convert content in 'location' column in df dataframe to list
    # e.g. type(['696 724']) == <class 'str'> ----> type([696, 724]) == <class 'list'> 

    # Merge notes_df, features_df and df dataframes into df dataframe
    df = df.merge(patient_notes_df, how='left')
    df = df.merge(features_df, how='left')

    # Process feature_text
    df['feature_text'] = [process_feature_text(x) for x in df['feature_text']]
    df['feature_text'] = df['feature_text'].apply(lambda x: x.lower())

    # Process pn_history
    df['pn_history'] = [clean_abbrev(x) for x in df['pn_history']]
    df['pn_history'] = df['pn_history'].apply(lambda x: x.lower())

    # Encode features
    # [-1, 0, 1, 2, ...,  9, 10, 11, 12, 100, 101, 102, ..., 916] --->
    # [ 0, 1, 2, 3, ..., 10, 11, 12, 13,  14,  15,  16, ..., 143]
    empty = -1
    classes = [empty,] + features_df.feature_num.unique().tolist()
    label_encoder = LabelEncoder()
    label_encoder.fit(classes) 

    # Encode the features in the column 'target' in df dataframe
    df['target'] = label_encoder.transform(df['feature_num'])
    empty_idx = label_encoder.transform([empty,])[0]
    print('empty_idx = ', empty_idx)
    n_classes = len(label_encoder.classes_)
    print('number of classes = ', n_classes)
    df.to_csv('df.csv', index = False)
else:
    n_classes = 144
    emtpy_idx = 0
    df = pd.read_csv('../input/files-extracted/df.csv')

In [27]:
def decode_location(locations):
    '''
    Given a locations as an input,
    it returns a sorted list of tuples. Each tuple corresponds to a 
    location. (begin, end)
    "['505 514;516 527']" ---> [(505, 514), (516, 527)]
    '''
    for x in ['[',']',"'"]:
        locations = locations.replace(x,'')
    locations = locations.replace(',',';')
    locations = locations.split(";")
    res = []
    for location in locations:
        if location:
            x,y = location.split()
            res.append((int(x),int(y)))
    return sorted(res,key=lambda x:x[0])

In [28]:
# Define the hyperparameters for the model
hyperparameters = {
    'max_length': 512,
    'padding': 'max_length',
    'return_offsets_mapping': True,
    'truncation': 'only_second',
    'model_name': 'bert-base-uncased',
    'dropout': 0.2,
    'lr': 1e-5,
    'val_size': 0.2,
    'seed': 1268,
    'batch_size': 4
}

In [29]:
df.sort_values(by=['pn_num'])

Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location,pn_history,feature_text,target
0,00016_000,0,16,0,['dad with recent heart attcak'],['696 724'],hpi: 17yo m presents with palpitations. patien...,family history of mi; family history of myocar...,1
12,00016_012,0,16,12,['M'],['10 11'],hpi: 17yo m presents with palpitations. patien...,male,13
11,00016_011,0,16,11,['17yo'],['5 9'],hpi: 17yo m presents with palpitations. patien...,17 year,12
10,00016_010,0,16,10,['3-4 months of'],['56 69'],hpi: 17yo m presents with palpitations. patien...,few months duration,11
8,00016_008,0,16,8,[],[],hpi: 17yo m presents with palpitations. patien...,caffeine use,9
...,...,...,...,...,...,...,...,...,...
14284,95333_901,9,95333,901,['20 year old'],['22 33'],stephanie madden is a 20 year old woman compla...,20 year,128
14283,95333_900,9,95333,900,"['Did not respond to ibuprofen', 'Did not resp...","['212 240', '212 230;242 249']",stephanie madden is a 20 year old woman compla...,no relief with motrin; no relief with tylenol,127
14298,95333_915,9,95333,915,['no sick contacts'],['421 437'],stephanie madden is a 20 year old woman compla...,no known illness contacts,142
14290,95333_907,9,95333,907,[],[],stephanie madden is a 20 year old woman compla...,no rash,134


In [30]:
# Define the tokenizer
tokenizer = BertTokenizerFast.from_pretrained(hyperparameters['model_name'])

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [31]:
class CustomDataset(Dataset):
    
    def __init__(self, data, tokenizer, config):
        self.data = data
        self.tokenizer = tokenizer
        self.config = config

    def __len__(self):
        '''Function that returns the length of the dataset'''
        return len(self.data)

    def __getitem__(self, index):
        '''Function that processes and returns 1 datapoint at a time.'''
        sequence = self.data.iloc[index]["sequence"]
        mask = self.data.iloc[index]['mask']
        label = self.data.iloc[index]['label']
        return np.array(sequence), np.array(mask), np.array(label)

In [32]:
class CustomModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.bert = AutoModel.from_pretrained(config['model_name'])  # BERT model
        self.dropout = nn.Dropout(p=config['dropout'])
        self.config = config
        self.fc1 = nn.Linear(768, n_classes)

    def summary(self):
        return summary(self)

    def forward(self, input_ids, attention):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention)
        logits = self.fc1(self.dropout(outputs[0]))
        return logits

In [33]:
if not data_exists:
    sequences, labels, masks = [], [], []

    for group in tqdm(df.groupby('pn_num')):  #len(df.groupby('pn_num)) = 1000
        group_df = group[1]
        pn_history  = group_df.iloc[0].pn_history
        tokens = tokenizer.encode_plus(pn_history, max_length=hyperparameters['max_length'], padding='max_length', truncation=True, return_offsets_mapping=True)
        sequence = tokens['input_ids']
        attention_mask = tokens['attention_mask']
        label = np.array([empty_idx for _ in range(hyperparameters['max_length'])])
        
        # BUILD THE TARGET ARRAY
        offsets = tokens['offset_mapping']

        label_empty = True
        for index, row in group_df.iterrows():
            target = row.target
            for i, (w_start, w_end) in enumerate(offsets):
                #print(row.location)
                for start, end in decode_location(row.location):
                    if w_start < w_end and (w_start >= start) and (end >= w_end):
                        label[i] = target
                        label_empty = False
                    if w_start >= w_end:
                        break
        if not label_empty:
            sequences.append(sequence)
            masks.append(attention_mask)
            labels.append(label)

    sequences = np.array(sequences).astype(np.int32)
    masks = np.array(masks).astype(np.uint8)
    labels = F.one_hot(torch.Tensor(labels).long(), num_classes=n_classes)
    labels = np.array(labels)
    np.save(open("masks.npy",'wb'), masks)
    np.save(open("sequences.npy",'wb'), sequences)
    np.save(open("labels.npy",'wb'), labels)
else:
    masks = np.load(open("../input/files-extracted/masks.npy",'rb'))
    sequences = np.load(open("../input/files-extracted/sequences.npy",'rb'))
    labels = np.load(open("../input/files-extracted/labels.npy",'rb'))

In [34]:
frequencies_pn = list(Counter(df.pn_num.values).values()) #to count the frequency of each unique patient number

#to create a vector in wich each entry between 0 and 1000 
#is repeated according to the corresponding entry in the frequencies_pn vector:
labels_index = np.repeat(np.arange(0, 1000), frequencies_pn) #Redefine the pn_num labels [16,16,...] -> [0, 0,...]
df['sequence'] = np.repeat(sequences, frequencies_pn, axis= 0).tolist() #In order to fit our dataframe
df['mask'] = np.repeat(masks, frequencies_pn, axis = 0).tolist()
df['label'] = pd.Series(np.arange(len(df))).apply(lambda x: labels[labels_index[x]]) # Put the one hot' lables into dataframe by "new pn_labels(labels_index)"

In [35]:
# Split df dataframe into df_train and df_val dataframes (80:20)
df_train, df_val = train_test_split(df, test_size=hyperparameters['val_size'], random_state=hyperparameters['seed'])

training_data = CustomDataset(df_train, tokenizer, hyperparameters)
train_dataloader = DataLoader(training_data, batch_size=hyperparameters['batch_size'], shuffle=True)

test_data = CustomDataset(df_val, tokenizer, hyperparameters)
test_dataloader = DataLoader(test_data, batch_size=hyperparameters['batch_size'], shuffle=False)

In [36]:
true = torch.from_numpy(np.array([[ [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]], 
                 [ [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 0, 1] ]]))
print(true.shape)

torch.Size([2, 3, 4])


In [46]:
prediction = torch.from_numpy(np.array([[ [0.90, 0.1, 0, 0], [0, 0.2, 0.80, 0], [0, 0.10, 0.90, 0]], 
                 [ [0.1, 0, 0.9, 0], [0.10, 0, 0.95, 0.90], [0, 0, 0, 1] ]])) #correct, correct, correct -------- #wrong, wrong, correct

In [38]:
prediction.shape

torch.Size([2, 3, 4])

In [39]:
torch.max(prediction, 0)

torch.return_types.max(
values=tensor([[0.9000, 0.1000, 0.3000, 0.0000],
        [0.1000, 0.2000, 0.8000, 0.9000],
        [0.0000, 0.1000, 0.9000, 1.0000]], dtype=torch.float64),
indices=tensor([[0, 0, 1, 0],
        [1, 0, 0, 1],
        [0, 0, 0, 1]]))

In [50]:
_, predicted_example = torch.max(true, 2) 
_, labelled_example = torch.max(prediction, 2)

In [65]:
labelled_example

tensor([[0, 2, 2],
        [2, 2, 3]])

In [53]:
correct = 1 if np.sum((np.array(torch.eq(predicted_example,labelled_example)).flatten())) == 4*512 else 0

In [54]:
correct

1

In [None]:
(predicted_example == labelled_example).sum().item()

## Build the model

In [57]:
device  = "cuda"
model = CustomModel(hyperparameters).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=hyperparameters['lr'])

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [68]:
def train_model(model, dataloader, optimizer, criterion):
        model.train()
        train_loss = []
        accuracy_batch = []
        for batch in tqdm(dataloader):
            optimizer.zero_grad()
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            logits = model(input_ids, attention_mask)
            labels = batch[2].to(device)
            loss = criterion(torch.permute(logits, (0,2,1)),  torch.permute(labels.double(), (0,2,1)))
            train_loss.append(loss.item()* input_ids.size(0))
            _, predicted = torch.max(logits, 2)
            _, labelled = torch.max(labels, 2)
            accuracy_batch.append(float(torchmetrics.functional.accuracy(predicted, labelled, subset_accuracy = True)))
            loss.backward()
            # clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
            # it's also improve f1 accuracy slightly
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            
        accuracy = 100 * sum(accuracy_batch) / len(accuracy_batch)
        return sum(train_loss)/len(train_loss), accuracy

In [67]:
def eval_model(model, dataloader, criterion):
        model.eval()
        valid_loss = []
        accuracy_batch = []
        
        for batch in tqdm(dataloader): 
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)
            logits = model(input_ids, attention_mask)
            loss = criterion(torch.permute(logits, (0,2,1)),  torch.permute(labels.double(), (0,2,1)))
            _, predicted = torch.max(logits, 2)
            _, labelled = torch.max(labels, 2)
            accuracy_batch.append(float(torchmetrics.functional.accuracy(predicted, labelled, subset_accuracy = True)))
            valid_loss.append(loss.item() * input_ids.size(0))

        accuracy = 100 * sum(accuracy_batch) / len(accuracy_batch)
        return sum(valid_loss)/len(valid_loss), accuracy

In [69]:
import time

train_loss_data, valid_loss_data = [], []
score_data_list = []
valid_loss_min = np.Inf
since = time.time()
epochs = 3

In [None]:
TRAIN = False

In [70]:
if TRAIN:
    best_loss = np.inf

    for i in range(epochs):

        print("Epoch: {}/{}".format(i + 1, epochs))
        # first train model
        train_loss, accuracy = train_model(model, train_dataloader, optimizer, criterion)
        train_loss_data.append(train_loss)
        print(f"Train loss: {train_loss}\nTrain accuracy: {accuracy}")
        # evaluate model
        valid_loss, score = eval_model(model, test_dataloader, criterion)
        valid_loss_data.append(valid_loss)
        score_data_list.append(score)
        print(f"Valid loss: {valid_loss}")
        print(f"Valid accuracy: {score}")

        if valid_loss < best_loss:
            best_loss = valid_loss
            torch.save(model.state_dict(), "nbme_bert_v2.pth")


    time_elapsed = time.time() - since
    print('Training completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

Epoch: 1/3


  0%|          | 0/2860 [00:00<?, ?it/s]

Train loss: 0.035311944804519824
Train accuracy: 45.43706293706294


  0%|          | 0/715 [00:00<?, ?it/s]

Valid loss: 0.008092420681411642
Valid accuracy: 81.60839160839161
Epoch: 2/3


  0%|          | 0/2860 [00:00<?, ?it/s]

Train loss: 0.015357144174412137
Train accuracy: 67.58741258741259


  0%|          | 0/715 [00:00<?, ?it/s]

Valid loss: 0.0053743918855239095
Valid accuracy: 86.46853146853147
Epoch: 3/3


  0%|          | 0/2860 [00:00<?, ?it/s]

Train loss: 0.008335033340556798
Train accuracy: 79.22202797202797


  0%|          | 0/715 [00:00<?, ?it/s]

Valid loss: 0.001360639073107384
Valid accuracy: 97.41258741258741
Training completed in 37m 19s


In [None]:
model.load_state_dict(torch.load("nbme_bert_v2.pth", map_location = device))

In [144]:
def create_test_df():
    features_df = pd.read_csv(f"../input/nbme-score-clinical-patient-notes/features.csv")
    patient_notes_df = pd.read_csv(f"../input/nbme-score-clinical-patient-notes/patient_notes.csv")
    test = pd.read_csv(f"../input/nbme-score-clinical-patient-notes/test.csv")
    test = test.merge(patient_notes_df,on=['case_num','pn_num']).merge(features,on=['case_num','feature_num'])

    def process_feature_text(text):
        return text.replace("-OR-", ";-").replace("-", " ")
    
    test["feature_text"] = [process_feature_text(x) for x in test["feature_text"]]
     # Process pn_history
    test['pn_history'] = [clean_abbrev(x) for x in test['pn_history']]
    test['pn_history'] = test['pn_history'].apply(lambda x: x.lower())
    return test


In [120]:
class SubmissionDataset(Dataset):
    def __init__(self, data, tokenizer, config):
        self.data = data
        self.tokenizer = tokenizer
        self.config = config
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        sequence = self.data.iloc[index]["sequence"]
        mask = self.data.iloc[index]['mask']
        return np.array(sequence), np.array(mask)
    

In [145]:
test = create_test_df()


In [146]:
empty = -1
features_df = pd.read_csv(f"../input/nbme-score-clinical-patient-notes/features.csv")
classes = [empty,] + features_df.feature_num.unique().tolist()
label_encoder = LabelEncoder()
label_encoder.fit(classes) 
test['target']= label_encoder.transform(test['feature_num'])
empty_idx = label_encoder.transform([empty,])[0]
n_classes = len(label_encoder.classes_)

In [150]:
test_sequences, test_masks, test_offsets = [], [],[]
row_ids = []
targets = []

for g1 in tqdm(test.groupby('pn_num')):
    gdf = g1[1]

    pn_history  = gdf.iloc[0].pn_history
    targets.append([])
    row_ids.append([])
    
    test_tokens = tokenizer.encode_plus(pn_history, max_length=hyperparameters['max_length'], padding='max_length',truncation=True, return_offsets_mapping=True)
    test_sequence = test_tokens['input_ids']
    test_attention_mask = test_tokens['attention_mask'] 

    # BUILD THE TARGET ARRAY
    offset = test_tokens['offset_mapping']
    
    for index, row in gdf.iterrows():
        targets[-1].append(row.target)
        row_ids[-1].append(row.id)
         
    test_sequences.append(test_sequence)
    test_masks.append(test_attention_mask)
    test_offsets.append(offset)

test_sequences = np.array(test_sequences).astype(np.int32)
test_masks = np.array(test_masks).astype(np.uint8)
targets_to_row_ids = [dict(zip(a,b)) for a,b in zip(targets,row_ids)]

  0%|          | 0/1 [00:00<?, ?it/s]

In [275]:
frequencies_pn_test = list(Counter(test.pn_num.values).values()) #to count the frequency of each unique patient number
test['sequence'] = np.repeat(test_sequences, frequencies_pn_test, axis= 0).tolist() #In order to fit our dataframe
test['mask'] = np.repeat(test_masks, frequencies_pn_test, axis = 0).tolist()

In [273]:
submission_data = SubmissionDataset(test, tokenizer, hyperparameters)
submission_dataloader = DataLoader(submission_data, batch_size=hyperparameters['batch_size'], shuffle=False)

4

In [277]:
model.eval()
preds = []
for batch in tqdm(submission_dataloader): 
    input_ids = batch[0].to(device)
  
    attention_mask = batch[1].to(device)
    logits = model(input_ids, attention_mask)
    _, predicted = torch.max(logits, 2)
    preds.append(predicted.detach().cpu().numpy())

  0%|          | 0/1 [00:00<?, ?it/s]

In [282]:
preds[0].shape

(5, 512)

In [264]:
preds_concatenated = np.concatenate(preds, axis= -1)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 4 and the array at index 1 has size 1

In [247]:
another_arr = preds_concatenated.reshape(-1, preds_concatenated.shape[-1])

In [283]:
preds_argmax= np.argmax(preds, axis= -1)

In [266]:
print(len(preds_argmax))

2560


In [191]:
offsets[0]

NameError: name 'offsets' is not defined

In [284]:
def decode_position(pos):
    return ";".join([" ".join(np.array(p).astype(str)) for p in pos])

def translate(preds,targets_to_row_ids,offsets):
    all_ids = []
    all_pos = []

    for k in range(len(preds)):
        print(k)
        offset = offsets[k]
        pred = preds[k]
        targets_to_ids = targets_to_row_ids[k]
        
        prediction = {targets_to_ids[t]:[] for t in targets_to_ids}
        i = 0
        while i<hyperparameters['max_length']:
            label = pred[i]
            
            if label == empty_idx:
                i += 1
                continue
            if label in targets_to_ids:
                key = targets_to_ids[label]
                print(offset[i])
                start = offset[i][0]
                print(start)
                while i<hyperparameters['max_length']:
                    if pred[i] != label:
                        break
                    else:
                        end = max(offset[i])
                    i += 1
                if  end == 0:
                    break
                prediction[key].append((start,end))
            else:
                i+=1
        for key in prediction:
            all_ids.append(key)
            all_pos.append(decode_position(prediction[key]))
    df = pd.DataFrame({
        "id":all_ids,
        "location": all_pos
    })
    return df

In [285]:
sub = translate(preds_argmax,targets_to_row_ids,test_offsets)
sub.to_csv('submission.csv',index=False)
sub.head(50)

0


IndexError: index 5 is out of bounds for axis 0 with size 5

In [158]:
test_offsets[0][0]

(0, 0)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=ed757204-e2d5-436b-b0d3-03d7c5b85d52' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>