In [4]:
import numpy as np
import pandas as pd

# Core packages for general use throughout the notebook.

import random
import warnings
import time
import datetime

# For customizing our plots.

from matplotlib.ticker import MaxNLocator
import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches

# Loading pytorch packages.

import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, BertConfig, get_linear_schedule_with_warmup
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler

In [3]:
# If there's a GPU available...

if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.  
    
    device = torch.device('cuda')    


    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device('cpu')

No GPU available, using the CPU instead.


In [6]:
# Loading the data for modelling.

train = pd.read_csv('train_cleaned.csv')
test = pd.read_csv('test_cleaned.csv')

print(f'Number of training tweets: {train.shape[0]}\n')
print(f'Number of training tweets: {test.shape[0]}\n')

display(train.sample(4))

Number of training tweets: 7613

Number of training tweets: 3263



Unnamed: 0,id,keyword,location,text,target,text_clean,tokenized,lower,stopwords_removed,pos_tags,wordnet_pos,lemmatized,lemma_str,filtered_lemma_str,combined_str
5931,8471,screamed,18 ??? CC,Some kids going to leadership camp came into m...,0,Some kids going to leadership camp came into m...,"['Some', 'kids', 'going', 'to', 'leadership', ...","['some', 'kids', 'going', 'to', 'leadership', ...","['kids', 'going', 'leadership', 'camp', 'came'...","[('kids', 'NNS'), ('going', 'VBG'), ('leadersh...","[('kids', 'n'), ('going', 'v'), ('leadership',...","['kid', 'go', 'leadership', 'camp', 'come', 'w...",kid go leadership camp come work today ask bat...,kid leadership camp come work today ask bathro...,kid leadership camp come work today ask bathro...
470,680,attack,"Halton, Ontario",Suspect in latest US theatre attack had psycho...,1,Suspect in latest US theatre attack had psycho...,"['Suspect', 'in', 'latest', 'US', 'theatre', '...","['suspect', 'in', 'latest', 'us', 'theatre', '...","['suspect', 'latest', 'us', 'theatre', 'attack...","[('suspect', 'JJ'), ('latest', 'JJS'), ('us', ...","[('suspect', 'a'), ('latest', 'a'), ('us', 'n'...","['suspect', 'late', 'u', 'theatre', 'attack', ...",suspect late u theatre attack psychological issue,suspect late theatre attack psychological issue,suspect late theatre attack psychological issu...
874,1267,blood,PunPunl??ndia,@Lobo_paranoico Mad Men,0,@Loboparanoico Mad Men,"['@Loboparanoico', 'Mad', 'Men']","['@loboparanoico', 'mad', 'men']","['@loboparanoico', 'mad', 'men']","[('@loboparanoico', 'JJ'), ('mad', 'NNS'), ('m...","[('@loboparanoico', 'a'), ('mad', 'n'), ('men'...","['@loboparanoico', 'mad', 'men']",@loboparanoico mad men,@loboparanoico mad men,@loboparanoico mad men blood
6352,9082,structural%20failure,,Slums are a manifestation state failure to pro...,1,Slums are a manifestation state failure to pro...,"['Slums', 'are', 'a', 'manifestation', 'state'...","['slums', 'are', 'a', 'manifestation', 'state'...","['slums', 'manifestation', 'state', 'failure',...","[('slums', 'NNS'), ('manifestation', 'NN'), ('...","[('slums', 'n'), ('manifestation', 'n'), ('sta...","['slum', 'manifestation', 'state', 'failure', ...",slum manifestation state failure provide housi...,slum manifestation state failure provide housi...,slum manifestation state failure provide housi...


In [7]:
train = train.

Unnamed: 0,id,combined_str
0,0,happen terrible car crash
1,2,heard #earthquake different city stay safe eve...
2,3,forest fire spot pond geese flee across street...
3,9,apocalypse light #spokane #wildfires
4,11,typhoon soudelor kill china taiwan


In [None]:
# Setting target variables, creating combined data and saving index for dividing combined data later.
labels = train['target'].values
idx = len(labels)
combined = pd.concat([train, test])
combined = combined.text.values