In [1]:
import pandas as pd
from sklearn import feature_extraction, linear_model, model_selection, preprocessing
from transformers import AutoTokenizer
import torch
import numpy as np

In [34]:
df = pd.read_csv('data/train.csv')

# Brief look at the data

In [35]:
df.sample(20)

Unnamed: 0,id,keyword,location,text,target
2644,3796,destruction,,So you have a new weapon that can cause un-ima...,1
2227,3185,deluge,,The f$&amp;@ing things I do for #GISHWHES Just...,0
5448,7769,police,UK,DT @georgegalloway: RT @Galloway4Mayor: ÛÏThe...,1
132,191,aftershock,,Aftershock back to school kick off was great. ...,0
6845,9810,trauma,"Montgomery County, MD",in response to trauma Children of Addicts deve...,0
5559,7934,rainstorm,,@Calum5SOS you look like you got caught in a r...,0
1765,2538,collision,,my favorite lady came to our volunteer meeting...,1
1817,2611,crashed,,@brianroemmele UX fail of EMV - people want to...,1
6810,9756,tragedy,"Los Angeles, CA",Can't find my ariana grande shirt this is a f...,0
4398,6254,hijacking,"Athens,Greece",The Murderous Story Of AmericaÛªs First Hijac...,1


# Clean the text up a little bit

In [36]:
df['text'] = df['text'].apply(lambda x: x.lower()) # convert to lowercase
df['text'] = df['text'].apply(lambda x: x.replace("#", "")) # remove 

In [37]:
df['text'] = df['text'].apply(lambda x: " ".join([x for x in x.split(" ") if "http" not in x])) # remove hypterlinks
df['text'] = df['text'].apply(lambda x: " ".join([x for x in x.split(" ") if "@" not in x])) # remove tags
df['text'] = df['text'].apply(lambda x: " ".join([x for x in x.split(" ") if "a" != x])) # remove a
df['text'] = df['text'].apply(lambda x: " ".join([x for x in x.split(" ") if "the" != x])) # remove the
df['text'] = df['text'].apply(lambda x: " ".join([x for x in x.split(" ") if "an" != x])) # remove an

In [38]:
df.sample(25)

Unnamed: 0,id,keyword,location,text,target
7138,10224,volcano,,hill hill mountain volcano of hell mountain hi...,1
2151,3086,deaths,Blackpool,cancers equate for around 25% of all deaths in...,1
4395,6247,hijacking,World,murderous story of americaûªs first hijacking...,1
2508,3602,desolation,"Birmingham, UK",date for release of ep03 desolation is set. st...,1
1378,1987,bush%20fires,London/Bristol/Guildford,on holiday to relax sunbathe and drink ... put...,0
6825,9775,trapped,????s ?? ????Ìø????Ì¡a,(?eudrylantiqua?) hollywood movie about trappe...,1
3877,5514,flattened,Some other mansion,flattened all cartoony-like.\n'whoa there papa!',0
3465,4957,exploded,,news science london warship exploded in 1665 b...,1
6245,8921,snowstorm,"Brooklyn, NY",'cooler than freddie jackson sippin' milkshake...,0
5664,8083,rescue,Wanderlust,mary coming to troy rescue. ??????,0


# We must turn text into mathematical representation

There are a number of ways to do this, the most simple of which is a count vectorizor, where we simply count the number of times a word shows up in the tweet.


For our transformer example, we use a pre-trained tokenizer. This has the vocabulary step already taken care of (the training), and all it has to do is to map our text documents into encoded vectors. We set the max length of any document/tweet to the length of the longest tweet in our training data. We allow truncation of any tweets that are longer than this if one is passed to the model during inference (or training) and we utilize a padding scheme of "max_length" to pad shorter tweets until they are the same length of the max_length. This is done because the training step will be performed by matrix algebra, and in order to multiply matrices together they must be a consistent length, e.i. a 3x1 matrix can be multiplied by a 3x3 matrix, but cannot be multiplied by a matrix that has 3 rows of length: 3-2-3, respectively. (that is in fact not a well-defined matrix). 

In [39]:
list(df["text"][0:2].values)

['our deeds are reason of this earthquake may allah forgive us all',
 'forest fire near la ronge sask. canada']

In [40]:
# count_vectorizer = feature_extraction.text.CountVectorizer()
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
encoding = tokenizer(list(df["text"][0:3].values),
                     padding="max_length", 
                     truncation=True,
                     max_length=df["text"].apply(len).max())
## let's get counts for the first single tweet in the data
# example_train_vectors = count_vectorizer.fit_transform(df["text"][0:1])

loading configuration file config.json from cache at /Users/dylan.frizzell/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/config.json
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.26.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file vocab.txt from cache at /Users/dylan.frizzell/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891c

In [41]:
# Show an example encoding. Notice the padding as well as sentence begin and end tokens (101,102).
encoding

{'input_ids': [[101, 2256, 15616, 2024, 3114, 1997, 2023, 8372, 2089, 16455, 9641, 2149, 2035, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 3224, 2543, 2379, 2474, 6902, 3351, 21871, 2243, 1012, 2710, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [42]:
tokenizer.decode(encoding['input_ids'][1])

'[CLS] forest fire near la ronge sask. canada [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

## Notice some things about this new encoding.
There is a preceeding 101 and trailing 102 for all rows, these are special start and seperator tokens. There is an additional token type id that is used to signify special sequences (https://huggingface.co/docs/transformers/v4.26.0/en/model_doc/bert#transformers.BertTokenizer). Also there is an attention mask that tells the model which columns to look at. This is because the BERT model requires a square tensor as input, so all input columns must be the same length, and we pad the end with zeros.

In [43]:
# Reset the index
df = df.sample(frac=1).reset_index(drop=True)

# I am now going to embedded datasets of both train and test separately

In [66]:
n_training_samples = 1000
n_test_samples = 50
## let's get embeddings for the  data
encoded_train_dataset = tokenizer(list(df["text"][:n_training_samples].values),
                     padding="max_length",
                     truncation=True,
                     max_length=df["text"].apply(len).max())

encoded_test_dataset = tokenizer(list(df["text"][n_training_samples:n_training_samples+n_test_samples].values),
                     padding="max_length",
                     truncation=True,
                     max_length=df["text"].apply(len).max())


## This particular model uses "labels" as a keyword for prediction labels.
So this means that for our encoded_\<version\>_dataset we have to build a "labels" column that has the category we are aiming to predict.

In [67]:
encoded_train_dataset['labels'] = df[:n_training_samples]['target'].values.astype(np.int64)
encoded_test_dataset['labels'] = df[n_training_samples:n_training_samples+n_test_samples]['target'].values.astype(np.int64)

## I use the huggingface "Dataset" object to make training a little easier.

There is actually quite a bit of difficulty that can be had here to get the data the right "type", a Pytorch tensor object of type Float/Long. Using some of this huggingface infrastructure is supposed to help with that... supposed to. Now-a-days I think using the _Dataset_ object from the datasets module is essentially mandatory in order to fine-tune or train models. This creates a "Dataset" object that has properties that huggingface knows to look for.

In [68]:
from datasets import Dataset, Features, Value

train_dataset = Dataset.from_dict(encoded_train_dataset)
test_dataset = Dataset.from_dict(encoded_test_dataset)


In [69]:
train_dataset

Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 300
})

In [70]:
# We should set the format of our dataset object to pytorch, as this is the underlaying model type of "Bert" which we will use
train_dataset.set_format("torch")
test_dataset.set_format("torch")

In [71]:
# One can see that we should be converting our datatypes into "torch" objects.
train_dataset['input_ids'].type()

'torch.LongTensor'

Here is where we actually download and register the ML model of choice. We are using the bert-base model which is a pretrained transformer architecture that is not specialized toward any specific dataset. We use the uncased dataset which refers to the fact that it draws now distinction between upper and lowercase when building tokens. The "problem_type" argument tells huggingface what kind of "head" to attach to the model. The last few layers of the model can be referred to as the head and give different final responses depending on the desired task. In this case we select "single_label_classificationn" which means we are looking at a binary classification problem - a yes or no. This while utilize a final layer that is a single sigmoid function which provides a single number that can be mapped between -1,1 and can be interpretted as a confidence that a document/tweet is a "yes" or "no", or in our case a tweet about a natural disaster or not.

In [72]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", 
                                                           problem_type="single_label_classification")

loading configuration file config.json from cache at /Users/dylan.frizzell/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/config.json
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "transformers_version": "4.26.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file pytorch_model.bin from cache at /Users/dylan.frizzell/.cache/huggingface/hub/model

## Model training

I will use some of huggingface's model training infrastructure to generate a training arguments object. AWS Sagemaker has something similar when building training jobs. 

I should note that I have selected model parameters that are for demonstration purposes only. I am running on a light CPU laptop, so I only perform a single epoch. In a more normal setting, one will want to train for multiple epochs, as well as perform hyperparameter scans on other parameters. This essentially means training multiple models with different arguments and selecting the model that most suitably solves your problem while not displaying any signs of overtraining or undesireable extrapolation.

In [73]:
from transformers import TrainingArguments, Trainer

args = TrainingArguments(
    f"bert-finetuned-binary-class",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
#     per_device_train_batch_size=batch_size,
#     per_device_eval_batch_size=batch_size,
    num_train_epochs=2,
    weight_decay=0.01,
    load_best_model_at_end=True,
#     metric_for_best_model=metric_name,
    #push_to_hub=True,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


We can check the training dataset's form to ensure it will be as expected by the traning job. Since we used the tokenizer that was used with the bert-base-uncased it has the embeddings that are expected by the model which is given as input_ids, token_type_ids, and attention_mask. We also have the label column that we created that is unique to our specific task that we are aiming to fine tune on.

In [74]:
train_dataset.features

{'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
 'labels': Value(dtype='int64', id=None)}

In [75]:
# Create the huggingface train object that will run our training jobs.
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer
#     compute_metrics=compute_metrics
)

# This is a test to see if the model can compute with no errors
Before running the training job we need to make sure that our dataset is correctly formatted in the expected format that the model is expecting. This means not only the shape of the training dataframe, but also the datatype, integer/float size, torch/tensorflow tensor type, etc. By running one of our single input_ids through the model and getting a response shows that the datatypes, tensor types, data sizes, etc. are all in alignment.

In [76]:
outputs = model(input_ids=train_dataset['input_ids'][0].unsqueeze(0),
                labels=train_dataset[0]['labels'].unsqueeze(0))
outputs

SequenceClassifierOutput(loss=tensor(0.7547, grad_fn=<NllLossBackward0>), logits=tensor([[0.3679, 0.2484]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

# We can either train a model or load a pretrained one

The first time thorugh this notebook you will want to run the training job. Upon subsequent runs, in order to save time one can upload the model that has already been fine tuned in the next cell.

In [77]:
# Use this line if this is the first time running this notebook or you wish to retune a model.
trainer.train()

***** Running training *****
  Num examples = 300
  Num Epochs = 2
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 76
  Number of trainable parameters = 109483778


  0%|          | 0/76 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 50
  Batch size = 8


  0%|          | 0/7 [00:00<?, ?it/s]

Saving model checkpoint to bert-finetuned-binary-class/checkpoint-38
Configuration saved in bert-finetuned-binary-class/checkpoint-38/config.json


{'eval_loss': 0.5303837656974792, 'eval_runtime': 17.4092, 'eval_samples_per_second': 2.872, 'eval_steps_per_second': 0.402, 'epoch': 1.0}


Model weights saved in bert-finetuned-binary-class/checkpoint-38/pytorch_model.bin
tokenizer config file saved in bert-finetuned-binary-class/checkpoint-38/tokenizer_config.json
Special tokens file saved in bert-finetuned-binary-class/checkpoint-38/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 50
  Batch size = 8


  0%|          | 0/7 [00:00<?, ?it/s]

Saving model checkpoint to bert-finetuned-binary-class/checkpoint-76
Configuration saved in bert-finetuned-binary-class/checkpoint-76/config.json


{'eval_loss': 0.4777432978153229, 'eval_runtime': 21.3435, 'eval_samples_per_second': 2.343, 'eval_steps_per_second': 0.328, 'epoch': 2.0}


Model weights saved in bert-finetuned-binary-class/checkpoint-76/pytorch_model.bin
tokenizer config file saved in bert-finetuned-binary-class/checkpoint-76/tokenizer_config.json
Special tokens file saved in bert-finetuned-binary-class/checkpoint-76/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from bert-finetuned-binary-class/checkpoint-76 (score: 0.4777432978153229).


{'train_runtime': 832.749, 'train_samples_per_second': 0.721, 'train_steps_per_second': 0.091, 'train_loss': 0.5219881158126028, 'epoch': 2.0}


TrainOutput(global_step=76, training_loss=0.5219881158126028, metrics={'train_runtime': 832.749, 'train_samples_per_second': 0.721, 'train_steps_per_second': 0.091, 'train_loss': 0.5219881158126028, 'epoch': 2.0})

## Model checkpoints are saved
As you can see in the above output, the model was trained and model "checkpoints" are saved that give the model weights at the end of each epoch. We can then reload the model at that particular moment in time. 

## Let's do this now to load the model we just trained

In [78]:
loaded_trained_model = AutoModelForSequenceClassification.from_pretrained("bert-finetuned-binary-class/checkpoint-76")

loading configuration file bert-finetuned-binary-class/checkpoint-76/config.json
Model config BertConfig {
  "_name_or_path": "bert-finetuned-binary-class/checkpoint-76",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.26.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file bert-finetuned-binary-class/checkpoint-76/pytorch_model.bin
All model checkpoint weights were used when initializing BertF

# Finally let's play around with our newly trained model
And see how we can tweak words to understand what it has learned. Notice how the order of the words now matters.

In [88]:
text = "There is a very serious emergency and a fire at the lodge."
# text = "These chicken wings are fire."

# text = "mudslide on my mountain"
# text = "on my mudslide mountain"

# Take our text snippet and create an embedding in the pytorch(pt) format.
# This uses the proper tokenizer from before
# We also map this to the proper dataset dictionary format in the second line
encoding = tokenizer(text, return_tensors="pt")
encoding = {k: v.to(model.device) for k,v in encoding.items()}

# Create the inference
outputs = loaded_trained_model(**encoding)

# Get the output layers/logits. This is the "head" of the model's raw output.
# There are two logits, one for "no" and one for "yes" in regards to "is this a tweet about a natural disaster?"
logits = outputs.logits
logits.shape

# We look at the output of the logits, and apply a squeeze normalization. 
# Again, this is essentially the raw output of the model's response of ["no","yes"]
print("Logits:")
print(logits.squeeze().cpu())
print("------------------------------------")

# In order to attach a probabilistic interpretation to this, we run the logits through the sigmoid function to map 
# The logits' values to probabilities. We are left with a tensor that is interpretted as [percent belief no, percent belief yes].
# The order of no,yes is set by the order of our 'labels' column in the train/test datasets (we labeled 'no' tweet as label 0, 'yes' as 1).
# Depending on the training method and dataset, these do not always sum to 1. For this we must calibrate the model.
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())
print("Sigmoid: [probability_no, probability_yes]")
print(probs)
print("------------------------------------")

# Lastly if we must make a decision we will say our prediction is tied to whether or not the "yes" column
# has a greater than 50% confidence.
print("Prediction:")
print(probs[1]>0.5)
print("------------------------------------")


Logits:
tensor([0.2018, 0.1848], grad_fn=<SqueezeBackward0>)
------------------------------------
Sigmoid: [probability_no, probability_yes]
tensor([0.5503, 0.5461], grad_fn=<SigmoidBackward0>)
------------------------------------
Prediction:
tensor(True)
------------------------------------


##  Am aside on logits and sigmoid functions
BERT uses the BCEwithLogits loss function and the output of the model is actual not a prediction like standard cross-entropy loss functions. In order to acheive the probabilistic interpretation we use the sigmoid function.

Sigmoid:     $\sigma = \frac{1}{1-e^{\beta x}}$

![](sigmoid.svg)

# We can calculate predictions on the full test set

In [93]:
predictions = []

for i, tweet in enumerate(df["text"][n_training_samples:n_training_samples+n_test_samples].values):
    # print("Calculating")
    inputs = tokenizer(tweet, return_tensors="pt")
    outputs = loaded_trained_model(**inputs)
    logits = outputs.logits
    probs = sigmoid(logits.squeeze().cpu())
    predictions.append(probs)
    # print(i)
    

In [94]:
predictions[0:3]

[tensor([0.3415, 0.7190], grad_fn=<SigmoidBackward0>),
 tensor([0.5845, 0.3669], grad_fn=<SigmoidBackward0>),
 tensor([0.6335, 0.2735], grad_fn=<SigmoidBackward0>)]

# Check the accuracy
Now that we have a series of predictions on the test set we can use this to estimate the accurcy of our model.
This is a little more difficult with the difference in output of the model. 

In [96]:
from sklearn.metrics import accuracy_score

is_dangerous = [1 if x < 0.5 else 0 for x in [x[0] for x in predictions] ]
test_labels = df[n_training_samples:n_training_samples+n_test_samples]['target'].values.astype(np.int64)

accuracy = accuracy_score(is_dangerous, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.82
