In [1]:
!pip install ipdb
!pip install transformers



# Package loading 

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np

import ipdb
import spacy
spacy.load("en_core_web_sm")
import torch
from torchtext import data
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

from sklearn.model_selection import KFold, StratifiedKFold

  import pandas.util.testing as tm


# Data Exploration

In [3]:
train = pd.read_csv("./data/Train_process.csv")
test = pd.read_csv("./data/Test_process.csv")

# train = pd.read_csv("./data/Train.csv")
# test = pd.read_csv("./data/Test.csv")

sample = pd.read_csv("./data/SampleSubmission.csv")

y_kfold = train.label.values # this is used down in the the kfold

In [4]:
train.tail()

Unnamed: 0,ID,text,label
611,BOHSNXCN,what should i do to stop alcoholism ?,Alcohol
612,GVDXRQPY,how to become my oneself again,Suicide
613,IO4JHIQS,how can someone stop it ?,Alcohol
614,1DS3P1XO,i feel unworthy,Depression
615,ORF71PVQ,i feel so discouraged with life,Depression


Let check the distribution of the data. 

In [5]:
test.tail()

Unnamed: 0,ID,text
304,Z9A6ACLK,yes
305,ZDUOIGKN,my girlfriend dumped me
306,ZHQ60CCH,how can i go back to being my old self ?
307,ZVIJMA4O,is it true hang is medicinal ?
308,ZYIFAY98,how can i overcome the problem ?


In [6]:
sample.tail()

Unnamed: 0,ID,Depression,Alcohol,Suicide,Drugs
304,Z9A6ACLK,0,0,0,0
305,ZDUOIGKN,0,0,0,0
306,ZHQ60CCH,0,0,0,0
307,ZVIJMA4O,0,0,0,0
308,ZYIFAY98,0,0,0,0


In [7]:
train.label.value_counts()

Depression    352
Alcohol       140
Suicide        66
Drugs          58
Name: label, dtype: int64

In [8]:
list(np.unique(train.label))

['Alcohol', 'Depression', 'Drugs', 'Suicide']

In [9]:
# # This plot need to be checked 
# plt.pie(list(train.len.value_counts()), labels=list(np.unique(train.len)))
# plt.show()

## Preparing Data

One of the main concepts of TorchText is the `Field`. These define how your data should be processed. In our sentiment classification task the data consists of both the raw string of the review and the sentiment, either "pos" or "neg".

The parameters of a `Field` specify how the data should be processed. 

We use the `TEXT` field to define how the review should be processed, and the `LABEL` field to process the sentiment. 

Our `TEXT` field has `tokenize='spacy'` as an argument. This defines that the "tokenization" (the act of splitting the string into discrete "tokens") should be done using the [spaCy](https://spacy.io) tokenizer. If no `tokenize` argument is passed, the default is simply splitting the string on spaces.

`LABEL` is defined by a `LabelField`, a special subset of the `Field` class specifically used for handling labels. We will explain the `dtype` argument later.

For more on `Fields`, go [here](https://github.com/pytorch/text/blob/master/torchtext/data/field.py).

We also set the random seeds for reproducibility.

In [10]:
SEED = 1234

# torch.manual_seed(SEED)
# torch.backends.cudnn.deterministic = True

# # TEXT = data.Field(tokenize = 'spacy')
# LABELS = data.LabelField(dtype = torch.float)

# TEXT = data.Field(batch_first = True,
#                   use_vocab = False,
# #                   tokenize = tokenize_and_cut,
#                   preprocessing = tokenizer.convert_tokens_to_ids,
#                   init_token = init_token_idx,
#                   eos_token = eos_token_idx,
#                   pad_token = pad_token_idx,
#                   unk_token = unk_token_idx)

import torch
from torchtext import data
from torchtext import datasets
import random

SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(tokenize = 'spacy')
LABELS = data.LabelField(dtype = torch.float)

In [11]:
train_data, test_data = data.TabularDataset.splits(
    path='./data', train='Train_process.csv',
    test='Test_process.csv', format='csv', skip_header=True,
    fields=[('ID', None),
            ('text', TEXT),
            ('label', LABELS)])


In [12]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of testing examples: {len(test_data)}')

Number of training examples: 616
Number of testing examples: 309


Let check one example

In [13]:
print(vars(train_data.examples[0]))

{'text': ['i', 'feel', 'that', 'it', 'was', 'better', 'i', 'die', 'am', 'happy'], 'label': 'Depression'}


In [14]:
vars(train_data[-1])

{'label': 'Depression',
 'text': ['i', 'feel', 'so', 'discouraged', 'with', 'life']}

In [15]:
# MAX_VOCAB_SIZE = 25_000

# glove.6B.300d": 862 M

TEXT.build_vocab(train_data, 
#                  max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.300d",  #glove.twitter.27B.200d
                 unk_init = torch.Tensor.normal_)

LABELS.build_vocab(train_data)

## Split 

In [16]:
import random

train_data, valid_data = train_data.split(random_state = random.seed(SEED), 
                                          stratified=True, split_ratio=0.8)

In [17]:
print(f'Number of training examples: {len(train_data)}')
# print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')

Number of training examples: 493
Number of testing examples: 309


In [18]:
# print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABELS.vocab)}")

Unique tokens in LABEL vocabulary: 4


In [19]:
print(LABELS.vocab.stoi)

defaultdict(<function _default_unk_index at 0x7f9a5cf4e2f0>, {'Depression': 0, 'Alcohol': 1, 'Suicide': 2, 'Drugs': 3})


In [20]:
print(vars(valid_data.examples[-1]))

{'text': ['what', 'are', 'the', 'effects', 'of', 'depression', '?'], 'label': 'Depression'}


The final step of preparing the data is creating the iterators. We iterate over these in the training/evaluation loop, and they return a batch of examples (indexed and converted into tensors) at each iteration.

We'll use a BucketIterator which is a special type of iterator that will return a batch of examples where each example is of a similar length, minimizing the amount of padding per example.

We also want to place the tensors returned by the iterator on the GPU (if you're using one). PyTorch handles this using torch.device, we then pass this device to the iterator.

In [21]:
BATCH_SIZE = 32

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    sort=False,
    device = device)

# train_iterator, test_iterator = data.BucketIterator.splits(
#     (train_data, test_data), 
#     batch_size = BATCH_SIZE,
#     sort=False,
#     device = device)

In [22]:
# next(test_iterator.__iter__())

# Build the Model

In [23]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, text):
        
        #text = [sent len, batch size]
        
        text = text.permute(1, 0)
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]
        
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
        
        out = self.fc(cat)
        
#         out = self.sigmoid(out)
            
        return out #self.fc(cat) #

In [24]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 300 #100
N_FILTERS = 100
FILTER_SIZES = [2,3,4]
OUTPUT_DIM = len(LABELS.vocab)
DROPOUT = 0.3
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)

In [25]:
# # can view the summary if you like
# model.to(device)
# summary(model, (16, 32))

In [26]:
# MAX_VOCAB_SIZE = 25_000

# TEXT.build_vocab(train_data, 
#                  max_size = MAX_VOCAB_SIZE, 
#                  vectors = "glove.6B.100d", 
#                  unk_init = torch.Tensor.normal_)

# LABEL.build_vocab(train_data)

Let's also create a function that will tell us how many trainable parameters our model has so we can compare the number of parameters across different models.

In [27]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 512,104 trainable parameters


In [28]:
# for name, param in model.named_parameters():                
#     if name.startswith('bert'):
#         param.requires_grad = False

In [29]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 512,104 trainable parameters


In [30]:
for name, param in model.named_parameters():                
    if param.requires_grad:
        print(name)

embedding.weight
convs.0.weight
convs.0.bias
convs.1.weight
convs.1.bias
convs.2.weight
convs.2.bias
fc.weight
fc.bias


# Train the model

Next, we'll load our pre-trained embeddings.

In [31]:
pretrained_embeddings = TEXT.vocab.vectors

model.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[-0.1117, -0.4966,  0.1631,  ..., -1.4447,  0.8402, -0.8668],
        [ 0.1032, -1.6268,  0.5729,  ...,  0.3180, -0.1626, -0.0417],
        [-0.1329,  0.1699, -0.1436,  ..., -0.2378,  0.1477,  0.6290],
        ...,
        [ 0.5390,  0.0090,  0.2787,  ..., -0.1150, -0.4234,  0.0318],
        [-0.3364, -0.1851, -0.1022,  ..., -0.1311, -0.0346,  0.3446],
        [-0.2117,  0.1462,  0.0605,  ..., -0.4895, -0.0707, -0.0814]])

Then zero the initial weights of the unknown and padding tokens.

In [32]:
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

Another different to the previous notebooks is our loss function (aka criterion). Before we used `BCEWithLogitsLoss`, however now we use `CrossEntropyLoss`. Without going into too much detail, `CrossEntropyLoss` performs a *softmax* function over our model outputs and the loss is given by the *cross entropy* between that and the label.

Generally:
- `CrossEntropyLoss` is used when our examples exclusively belong to one of $C$ classes
- `BCEWithLogitsLoss` is used when our examples exclusively belong to only 2 classes (0 and 1) and is also used in the case where our examples belong to between 0 and $C$ classes (aka multilabel classification).

In [33]:
from collections import Counter
class_distrbution = Counter()
for text in train["label"].values:
    class_distrbution[text] += 1
        
class_distrbution.most_common(10)

[('Depression', 352), ('Alcohol', 140), ('Suicide', 66), ('Drugs', 58)]

In [34]:
np.sum(list(class_distrbution.values()))

616

In [35]:
class_distrbution

Counter({'Alcohol': 140, 'Depression': 352, 'Drugs': 58, 'Suicide': 66})

In [36]:
LABELS.vocab.stoi.keys()

dict_keys(['Depression', 'Alcohol', 'Suicide', 'Drugs'])

In [37]:
# criterion = nn.CrossEntropyLoss(weight = x)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# train_loader_origin.dataset.classes,
class_weights = [class_distrbution[i] for i in LABELS.vocab.stoi.keys()]
class_weights_normalized = [max(class_weights)/i for i in class_weights]

class_weights_normalized ,torch.Tensor(class_weights_normalized)

w = torch.Tensor(class_weights_normalized)
w = w.to(device)
# x = x
w,class_distrbution

(tensor([1.0000, 2.5143, 5.3333, 6.0690], device='cuda:0'),
 Counter({'Alcohol': 140, 'Depression': 352, 'Drugs': 58, 'Suicide': 66}))

In [38]:
class_weights, w

([352, 140, 66, 58], tensor([1.0000, 2.5143, 5.3333, 6.0690], device='cuda:0'))

In [39]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

criterion = nn.CrossEntropyLoss()
# criterion = nn.CrossEntropyLoss(weight=w)



model = model.to(device)
criterion = criterion.to(device)

Before, we had a function that calculated accuracy in the binary label case, where we said if the value was over 0.5 then we would assume it is positive. In the case where we have more than 2 classes, our model outputs a $C$ dimensional vector, where the value of each element is the beleief that the example belongs to that class. 

For example, in our labels we have: 'HUM' = 0, 'ENTY' = 1, 'DESC' = 2, 'NUM' = 3, 'LOC' = 4 and 'ABBR' = 5. If the output of our model was something like: **[5.1, 0.3, 0.1, 2.1, 0.2, 0.6]** this means that the model strongly believes the example belongs to class 0, a question about a human, and slightly believes the example belongs to class 3, a numerical question.

We calculate the accuracy by performing an `argmax` to get the index of the maximum value in the prediction for each element in the batch, and then counting how many times this equals the actual label. We then average this across the batch.

In [40]:
from sklearn.metrics import accuracy_score, precision_score

def categorical_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    
    correct = max_preds.squeeze(1).eq(y)
#     ipdb.set_trace()
    pr = correct.sum().item() / torch.FloatTensor([y.shape[0]])
    
#     pr = precision_score(y.detach().numpy(), 
#                            max_preds.argmax(dim = 1, 
#                                         keepdim = True).detach().numpy(), average='macro')
#     ipdb.set_trace()
    return pr

The training loop is similar to before, without the need to `squeeze` the model predictions as `CrossEntropyLoss` expects the input to be **[batch size, n classes]** and the label to be **[batch size]**.

The label needs to be a `LongTensor`, which it is by default as we did not set the `dtype` to a `FloatTensor` as before.

In [41]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        
        predictions = model(batch.text)
#         ipdb.set_trace()
        
        loss = criterion(predictions, batch.label.long())
        
        acc = categorical_accuracy(predictions, batch.label.long())
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

The evaluation loop is, again, similar to before.

In [42]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.text)
            
            loss = criterion(predictions, batch.label.long())
            
            acc = categorical_accuracy(predictions, batch.label.long())

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [43]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

Next, we train our model.

In [44]:
N_EPOCHS = 10

best_valid_loss = 0.28 #float('inf')
best_valid_acc = 0.88

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        print('Saving Model ...')
        torch.save(model.state_dict(), 'Best_Model_Bert_'+str(best_valid_loss)[:4]+'.pt')
        print('*****************************************************')
        print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, valid_loss, valid_acc))
        print('*****************************************************')

Epoch: 01 | Epoch Time: 0m 0s
	Train Loss: 0.904 | Train Acc: 67.11%
	 Val. Loss: 0.643 |  Val. Acc: 77.98%
Epoch: 02 | Epoch Time: 0m 0s
	Train Loss: 0.468 | Train Acc: 84.69%
	 Val. Loss: 0.466 |  Val. Acc: 83.45%
Epoch: 03 | Epoch Time: 0m 0s
	Train Loss: 0.289 | Train Acc: 91.32%
	 Val. Loss: 0.409 |  Val. Acc: 85.01%
Epoch: 04 | Epoch Time: 0m 0s
	Train Loss: 0.202 | Train Acc: 94.46%
	 Val. Loss: 0.399 |  Val. Acc: 84.23%
Epoch: 05 | Epoch Time: 0m 0s
	Train Loss: 0.138 | Train Acc: 96.78%
	 Val. Loss: 0.399 |  Val. Acc: 84.23%
Epoch: 06 | Epoch Time: 0m 0s
	Train Loss: 0.088 | Train Acc: 99.02%
	 Val. Loss: 0.395 |  Val. Acc: 85.79%
Epoch: 07 | Epoch Time: 0m 0s
	Train Loss: 0.062 | Train Acc: 99.41%
	 Val. Loss: 0.446 |  Val. Acc: 85.01%
Epoch: 08 | Epoch Time: 0m 0s
	Train Loss: 0.048 | Train Acc: 99.41%
	 Val. Loss: 0.413 |  Val. Acc: 84.87%
Epoch: 09 | Epoch Time: 0m 0s
	Train Loss: 0.034 | Train Acc: 99.61%
	 Val. Loss: 0.424 |  Val. Acc: 84.09%
Epoch: 10 | Epoch Time: 0m 0

# Cross validation

In [9]:
from sklearn.metrics import accuracy_score, precision_score

def categorical_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    
    correct = max_preds.squeeze(1).eq(y)
#     ipdb.set_trace()
    pr = correct.sum().item() / torch.FloatTensor([y.shape[0]])
    
#     pr = precision_score(y.detach().numpy(), 
#                            max_preds.argmax(dim = 1, 
#                                         keepdim = True).detach().numpy(), average='macro')
#     ipdb.set_trace()
    return pr

def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        
        predictions = model(batch.text)
#         ipdb.set_trace()
        
        loss = criterion(predictions, batch.label.long())
        
        acc = categorical_accuracy(predictions, batch.label.long())
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)


def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.text)
            
            loss = criterion(predictions, batch.label.long())
            
            acc = categorical_accuracy(predictions, batch.label.long())

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)


import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [10]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, text):
        
        #text = [sent len, batch size]
        
        text = text.permute(1, 0)
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]
        
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
        
        out = self.fc(cat)
        
#         out = self.sigmoid(out)
            
        return out #self.fc(cat) #

In [11]:
class load_data(object):
    def __init__(self, SEED=1234):
        torch.manual_seed(SEED)
        torch.cuda.manual_seed(SEED)
        torch.backends.cudnn.deterministic = True

        TEXT = data.Field(tokenize = 'spacy')
        LABELS = data.LabelField(dtype = torch.float)


#         self.train_data, self.test_data = datasets.IMDB.splits(TEXT, LABEL)
        
        self.train_data, self.test_data = data.TabularDataset.splits(
            path='./data', train='Train_process.csv',
            test='Test_process.csv', format='csv', skip_header=True,
            fields=[('ID', None),
                    ('text', TEXT),
                    ('label', LABELS)])

        self.SEED = SEED


    def get_fold_data(self, num_folds=10):
        """
        More details about 'fields' are available at 
        https://github.com/pytorch/text/blob/master/torchtext/datasets/imdb.py
        """

        TEXT = data.Field(tokenize='spacy')
        LABELS = data.LabelField(dtype=torch.float)
        fields = [('text', TEXT), ('label', LABELS)]
        
#         kf = KFold(n_splits=num_folds, shuffle=True, random_state=self.SEED)
        kf = StratifiedKFold(n_splits=num_folds, random_state=self.SEED)
        
        train_data_arr = np.array(self.train_data.examples)

#         ipdb.set_trace()
        for train_index, val_index in kf.split(train_data_arr, y_kfold):
            yield(
                TEXT,
                LABELS,
                data.Dataset(train_data_arr[train_index], fields=fields),
                data.Dataset(train_data_arr[val_index], fields=fields),
            )
    
    def get_test_data(self):
        return self.test_data

In [12]:
# KFold?

In [13]:
data_generator = load_data()
_history = []
device = None
model = None
criterion = None
fold_index = 0
num_folds = 5
batch_size = 32
epochs = 5

import torch.optim as optim

# optimizer = optim.Adam(model.parameters())

best_valid_acc = 0.9
best_val_loss = 0.25
    
for TEXT, LABELS, train_data, val_data in data_generator.get_fold_data(num_folds=num_folds):
    print("***** Running Training *****")
    print(f"Now fold: {fold_index + 1} / {num_folds}")

    TEXT.build_vocab(train_data,
                     vectors = "glove.6B.300d",
                     unk_init = torch.Tensor.normal_)
    
    print(f'Embedding size: {TEXT.vocab.vectors.size()}.')
    LABELS.build_vocab(train_data) # For converting str into float labels.
    
#     Model(len(TEXT.vocab), embedding_dim, hidden_dim,
#         output_dim, num_layers, dropout, TEXT.vocab.vectors, embedding_trainable)
    
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 300
    N_FILTERS = 100
    FILTER_SIZES = [2,3,4]
    OUTPUT_DIM = len(LABELS.vocab)
    DROPOUT = 0.308
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
    
    model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)
    
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
    
    
    # Get w
    
    from collections import Counter
    class_distrbution = Counter()
    for text in y_kfold:
        class_distrbution[text] += 1


    class_weights = [class_distrbution[i] for i in LABELS.vocab.stoi.keys()]
    class_weights_normalized = [max(class_weights)/i for i in class_weights]


    w = torch.Tensor(class_weights_normalized)
    w = w.to(device)

    optimizer = optim.Adam(model.parameters())
#     criterion = nn.CrossEntropyLoss(weight=w)
    criterion = nn.CrossEntropyLoss()
        
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    criterion = criterion.to(device)

    train_iterator = data.Iterator(train_data, batch_size=batch_size, sort_key=lambda x: len(x.text), device=device)
    val_iterator = data.Iterator(val_data, batch_size=batch_size, sort_key=lambda x: len(x.text), device=device)

    for epoch in range(epochs):
        train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
        val_loss, val_acc = evaluate(model, val_iterator, criterion)
        print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'| Epoch: {epoch+1:02} | Valid Loss: {val_loss:.3f} | Valid Acc: {val_acc*100:.2f}%')
        print('################################################################################')
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            print('Saving Model ...')
            torch.save(model.state_dict(), 'Best_Model_'+str(best_val_loss)[:5]+'.pt')
            print('*****************************************************')
            print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
            print('*****************************************************')
    
    val_loss, val_acc = evaluate(model, val_iterator, criterion) 
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        print('Saving Model ...')
        torch.save(model.state_dict(), 'Best_Model_'+str(best_val_loss)[:5]+'.pt')
        print('*****************************************************')
        print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
        print('*****************************************************')
    
#     if val_acc > best_valid_acc:
#         best_valid_acc = val_acc
#         print('Saving Model ...')
#         torch.save(model.state_dict(), 'Best_Model_'+str(val_acc)+'.pt')
#         print('*****************************************************')
#         print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
#         print('*****************************************************')
        
    print(f'Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.2f}% |')

    _history.append([val_loss, val_acc])
    fold_index += 1

_history = np.asarray(_history)
loss = np.mean(_history[:, 0])
acc = np.mean(_history[:, 1])

print('***** Cross Validation Result *****')
print(f'LOSS: {loss}, ACC: {acc}')

.vector_cache/glove.6B.zip: 0.00B [00:00, ?B/s]

***** Running Training *****
Now fold: 1 / 5


.vector_cache/glove.6B.zip: 862MB [06:26, 2.23MB/s]                              
100%|█████████▉| 399555/400000 [00:37<00:00, 10831.18it/s]

Embedding size: torch.Size([717, 300]).
| Epoch: 01 | Train Loss: 0.924 | Train Acc: 64.13%
| Epoch: 01 | Valid Loss: 0.616 | Valid Acc: 80.58%
################################################################################
| Epoch: 02 | Train Loss: 0.485 | Train Acc: 83.98%
| Epoch: 02 | Valid Loss: 0.416 | Valid Acc: 85.94%
################################################################################
| Epoch: 03 | Train Loss: 0.320 | Train Acc: 89.13%
| Epoch: 03 | Valid Loss: 0.372 | Valid Acc: 86.94%
################################################################################


100%|█████████▉| 399555/400000 [00:50<00:00, 10831.18it/s]

| Epoch: 04 | Train Loss: 0.210 | Train Acc: 94.34%
| Epoch: 04 | Valid Loss: 0.325 | Valid Acc: 89.51%
################################################################################
| Epoch: 05 | Train Loss: 0.139 | Train Acc: 97.46%
| Epoch: 05 | Valid Loss: 0.305 | Valid Acc: 89.51%
################################################################################
Val. Loss: 0.298 | Val. Acc: 89.84% |
***** Running Training *****
Now fold: 2 / 5
Embedding size: torch.Size([708, 300]).
| Epoch: 01 | Train Loss: 0.908 | Train Acc: 67.79%
| Epoch: 01 | Valid Loss: 0.560 | Valid Acc: 84.14%
################################################################################
| Epoch: 02 | Train Loss: 0.463 | Train Acc: 84.98%
| Epoch: 02 | Valid Loss: 0.396 | Valid Acc: 86.92%
################################################################################
| Epoch: 03 | Train Loss: 0.306 | Train Acc: 90.34%
| Epoch: 03 | Valid Loss: 0.356 | Valid Acc: 86.00%
#################################

In [None]:
LOSS: 0.5449422478675843, ACC: 0.842294979095459
        
LOSS: 0.4586620181798935, ACC: 0.861218586564064

LOSS: 0.3850980430841446, ACC: 0.8524718940258026
        
LOSS: 0.3720459468662739, ACC: 0.8570932596921921

LOSS: 0.3683295398950577, ACC: 0.8664682626724243 # lower
        
LOSS: 0.3577434681355953, ACC: 0.8683201134204864 # spel checker contraction

LOSS: 0.35579548478126527, ACC: 0.8727182626724244


In [134]:
model.load_state_dict(torch.load('Best_Model_0.305.pt'))

val_loss, val_acc = evaluate(model, valid_iterator, criterion) 
val_loss

RuntimeError: Error(s) in loading state_dict for CNN:
	size mismatch for embedding.weight: copying a param with shape torch.Size([720, 300]) from checkpoint, the shape in current model is torch.Size([711, 300]).

In [169]:
# best record: [epoch 4], [val loss 0.60001], [val acc 0.83239]

Finally, let's run our model on the test set!

In [251]:
# model.load_state_dict(torch.load('tut5-model.pt'))

# test_loss, test_acc = evaluate(model, test_iterator, criterion)

# print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [480]:
# def predict_sentiment(model, tokenizer, sentence):
#     model.eval()
#     tokens = tokenizer.tokenize(sentence)
#     tokens = tokens#[:max_input_length-2]
#     indexed = [init_token_idx] + tokenizer.convert_tokens_to_ids(tokens) + [eos_token_idx]
#     tensor = torch.LongTensor(indexed).to(device)
#     tensor = tensor.unsqueeze(0)
#     prediction = torch.sigmoid(model(tensor))
#     return prediction

import spacy
nlp = spacy.load('en')

def predict_sentiment(model, sentence, min_len = 4):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    if len(tokenized) < min_len:
        tokenized += ['<pad>'] * (min_len - len(tokenized))
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    preds = torch.softmax(model(tensor), dim=1)
#     preds = torch.sigmoid(model(tensor))
#     preds = model(tensor)
    return preds

In [481]:
print(LABELS.vocab.stoi)

defaultdict(<function _default_unk_index at 0x7f354edce2f0>, {'Depression': 0, 'Alcohol': 1, 'Suicide': 2, 'Drugs': 3})


In [482]:
# predict_class(model, "Who is Keyser Söze?")

In [483]:
predict_sentiment(model, "weed addiction")

tensor([[0.0117, 0.2914, 0.0116, 0.6853]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)

In [484]:
# predict(model, test_iterator, criterion)

In [485]:
test_data = pd.read_csv("./data/Test_process.csv")
# test_data = pd.read_csv("./data/Test.csv")

test_data.head()

Unnamed: 0,ID,text
0,02V56KMO,how to overcome bad feelings and emotions
1,03BMGTOK,feel like giving up in life
2,03LZVFM6,was so depressed feel like got no strength to ...
3,0EPULUM5,feel so low especially since had no one to tal...
4,0GM4C5GD,can be successful when am a drug addict


In [486]:
test_data.iloc[0]['text']

'how to overcome bad feelings and emotions'

In [487]:
len(test_data)

309

In [488]:
# Inference

# Load Best Model
# model.load_state_dict(torch.load('Best_Model_Bert_0.23.pt'))

y_pred = np.zeros((309, 4))
for i in range(len(test_data)):
    y_pred[i] = predict_sentiment(model, test_data.iloc[i]['text']).data.cpu()

In [489]:
y_pred.shape

(309, 4)

In [68]:
# y_pred

## Optimus

In [50]:
!pip install optuna



In [53]:
class load_data(object):
    def __init__(self, SEED=1234):
        torch.manual_seed(SEED)
        torch.cuda.manual_seed(SEED)
        torch.backends.cudnn.deterministic = True

        TEXT = data.Field(tokenize = 'spacy')
        LABELS = data.LabelField(dtype = torch.float)


#         self.train_data, self.test_data = datasets.IMDB.splits(TEXT, LABEL)
        
        self.train_data, self.test_data = data.TabularDataset.splits(
            path='./data', train='Train_process.csv',
            test='Test_process.csv', format='csv', skip_header=True,
            fields=[('ID', None),
                    ('text', TEXT),
                    ('label', LABELS)])

        self.SEED = SEED


    def get_fold_data(self, num_folds=10):
        """
        More details about 'fields' are available at 
        https://github.com/pytorch/text/blob/master/torchtext/datasets/imdb.py
        """

        TEXT = data.Field(tokenize='spacy')
        LABELS = data.LabelField(dtype=torch.float)
        fields = [('text', TEXT), ('label', LABELS)]
        
#         kf = KFold(n_splits=num_folds, shuffle=True, random_state=self.SEED)
        kf = StratifiedKFold(n_splits=num_folds, random_state=self.SEED)
        
        train_data_arr = np.array(self.train_data.examples)

#         ipdb.set_trace()
        for train_index, val_index in kf.split(train_data_arr, y_kfold):
            yield(
                TEXT,
                LABELS,
                data.Dataset(train_data_arr[train_index], fields=fields),
                data.Dataset(train_data_arr[val_index], fields=fields),
            )
    
    def get_test_data(self):
        return self.test_data

In [54]:
# best_loss_val = 0.8

def objective(trial):
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')    
    
    
    lr  = trial.suggest_loguniform('lr', 1e-4, 1e-2)
#     optim_ = trial.suggest_categorical('optim_',[optim.SGD, optim.RMSprop,optim.Adam])
    
    
    momentum = trial.suggest_uniform('momentum', 0.5, 0.9)
    
    
    
    data_generator = load_data()
    _history = []
    device = None
    model = None
    criterion = None
    fold_index = 0
    num_folds = 5
    batch_size = 32
    epochs = 20

    import torch.optim as optim

    # optimizer = optim.Adam(model.parameters())

    best_valid_acc = 0.9
    best_val_loss = 0.2

    for TEXT, LABELS, train_data, val_data in data_generator.get_fold_data(num_folds=num_folds):
#         print("***** Running Training *****")
#         print(f"Now fold: {fold_index + 1} / {num_folds}")

        TEXT.build_vocab(train_data,
                         vectors = "glove.6B.300d",
                         unk_init = torch.Tensor.normal_)

#         print(f'Embedding size: {TEXT.vocab.vectors.size()}.')
        LABELS.build_vocab(train_data) # For converting str into float labels.

    #     Model(len(TEXT.vocab), embedding_dim, hidden_dim,
    #         output_dim, num_layers, dropout, TEXT.vocab.vectors, embedding_trainable)

        INPUT_DIM = len(TEXT.vocab)
        EMBEDDING_DIM = 300
        N_FILTERS = 100
        FILTER_SIZES = [2,3,4]
        OUTPUT_DIM = len(LABELS.vocab)
        DROPOUT = 0.5
        PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

        model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)

        UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

        model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
        model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)


        # Get w
        from collections import Counter
        class_distrbution = Counter()
        for text in y_kfold:
            class_distrbution[text] += 1


        class_weights = [class_distrbution[i] for i in LABELS.vocab.stoi.keys()]
        class_weights_normalized = [max(class_weights)/i for i in class_weights]


        w = torch.Tensor(class_weights_normalized)
        w = w.to(device)

#         optimizer = optim.Adam(model.parameters())
#         criterion = nn.CrossEntropyLoss(weight=w)
        
        optimizer = optim.SGD(model.parameters(), lr = lr, momentum=momentum, 
                      weight_decay=0, dampening=0, nesterov=False)
        
#         optimizer = optim.Adam(model.parameters(), lr = 1e-4, momentum=0.9, 
#                       weight_decay=0, dampening=0, nesterov=True)
    
        criterion = nn.CrossEntropyLoss(weight = w)
        
        

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        criterion = criterion.to(device)

        train_iterator = data.Iterator(train_data, batch_size=batch_size, sort_key=lambda x: len(x.text), device=device)
        val_iterator = data.Iterator(val_data, batch_size=batch_size, sort_key=lambda x: len(x.text), device=device)

        for epoch in range(epochs):
            train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
            val_loss, val_acc = evaluate(model, val_iterator, criterion)
#             print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
#             print(f'| Epoch: {epoch+1:02} | Valid Loss: {val_loss:.3f} | Valid Acc: {val_acc*100:.2f}%')
#             print('################################################################################')
#             if val_loss < best_val_loss:
#                 best_val_loss = val_loss
#                 print('Saving Model ...')
#                 torch.save(model.state_dict(), 'Best_Model_'+str(best_val_loss)+'.pt')
#                 print('*****************************************************')
#                 print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
#                 print('*****************************************************')

        val_loss, val_acc = evaluate(model, val_iterator, criterion) 

#         if val_loss < best_val_loss:
#                 best_val_loss = val_loss
#                 print('Saving Model ...')
#                 torch.save(model.state_dict(), 'Best_Model_'+str(best_val_loss)+'.pt')
#                 print('*****************************************************')
#                 print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
#                 print('*****************************************************')

    #     if val_acc > best_valid_acc:
    #         best_valid_acc = val_acc
    #         print('Saving Model ...')
    #         torch.save(model.state_dict(), 'Best_Model_'+str(val_acc)+'.pt')
    #         print('*****************************************************')
    #         print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
    #         print('*****************************************************')

#         print(f'Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.2f}% |')

        _history.append([val_loss, val_acc])
        fold_index += 1

    _history = np.asarray(_history)
    loss = np.mean(_history[:, 0])
    acc = np.mean(_history[:, 1])

    print('***** Cross Validation Result *****')
    print(f'LOSS: {loss}, ACC: {acc}')
    
    # Handle pruning based on the intermediate value.
    if trial.should_prune():
        raise optuna.exceptions.TrialPruned()
            
            
    return loss

In [55]:
import optuna

sampler = optuna.samplers.TPESampler()
study = optuna.create_study(sampler=sampler, direction='minimize')
study.optimize(func=objective, n_trials=100)


Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 1.327958458662033, ACC: 0.6552496731281281


[I 2020-07-03 20:37:05,156] Finished trial#0 with value: 1.327958458662033 with parameters: {'lr': 0.0003017745339013328, 'momentum': 0.6526010976015626}. Best is trial#0 with value: 1.327958458662033.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.9759525686502457, ACC: 0.8164930582046509


[I 2020-07-03 20:37:57,548] Finished trial#1 with value: 0.9759525686502457 with parameters: {'lr': 0.0010456463513600518, 'momentum': 0.822185665013464}. Best is trial#1 with value: 0.9759525686502457.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 1.2384560048580169, ACC: 0.7838376343250275


[I 2020-07-03 20:38:29,012] Finished trial#2 with value: 1.2384560048580169 with parameters: {'lr': 0.0010636778719478645, 'momentum': 0.5259356573424152}. Best is trial#1 with value: 0.9759525686502457.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.9574449002742768, ACC: 0.8234788358211518


[I 2020-07-03 20:39:03,721] Finished trial#3 with value: 0.9574449002742768 with parameters: {'lr': 0.002299344228500365, 'momentum': 0.6203303100320009}. Best is trial#3 with value: 0.9574449002742768.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 1.343417513370514, ACC: 0.5591435208916664


[I 2020-07-03 20:39:46,386] Finished trial#4 with value: 1.343417513370514 with parameters: {'lr': 0.0003074002482770984, 'momentum': 0.5102302083946814}. Best is trial#3 with value: 0.9574449002742768.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5771822988986969, ACC: 0.8426752686500549


[I 2020-07-03 20:40:26,041] Finished trial#5 with value: 0.5771822988986969 with parameters: {'lr': 0.006813470144576557, 'momentum': 0.6136871553855281}. Best is trial#5 with value: 0.5771822988986969.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 1.254415547847748, ACC: 0.7763144910335541


[I 2020-07-03 20:41:09,892] Finished trial#6 with value: 1.254415547847748 with parameters: {'lr': 0.0008173743804225808, 'momentum': 0.5931760006045274}. Best is trial#5 with value: 0.5771822988986969.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.8541928082704544, ACC: 0.8344824761152267


[I 2020-07-03 20:42:01,228] Finished trial#7 with value: 0.8541928082704544 with parameters: {'lr': 0.0027036542125089534, 'momentum': 0.6481203743133906}. Best is trial#5 with value: 0.5771822988986969.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.7429178118705749, ACC: 0.8310681253671646


[I 2020-07-03 20:42:51,933] Finished trial#8 with value: 0.7429178118705749 with parameters: {'lr': 0.0031461854417959936, 'momentum': 0.6878953743839797}. Best is trial#5 with value: 0.5771822988986969.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 1.3194498419761658, ACC: 0.6813409417867661


[I 2020-07-03 20:43:40,976] Finished trial#9 with value: 1.3194498419761658 with parameters: {'lr': 0.00026471941458783835, 'momentum': 0.7366998336728974}. Best is trial#5 with value: 0.5771822988986969.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5657602921128273, ACC: 0.8691220283508301


[I 2020-07-03 20:44:32,934] Finished trial#10 with value: 0.5657602921128273 with parameters: {'lr': 0.009556410480067438, 'momentum': 0.898475684579351}. Best is trial#10 with value: 0.5657602921128273.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5728199563920497, ACC: 0.864145177602768


[I 2020-07-03 20:45:22,624] Finished trial#11 with value: 0.5728199563920497 with parameters: {'lr': 0.009862062666237929, 'momentum': 0.8993882852502081}. Best is trial#10 with value: 0.5657602921128273.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5629334472119808, ACC: 0.864145177602768


[I 2020-07-03 20:46:14,359] Finished trial#12 with value: 0.5629334472119808 with parameters: {'lr': 0.009441720893355788, 'momentum': 0.8981196270204903}. Best is trial#12 with value: 0.5629334472119808.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5075690373778343, ACC: 0.8529844641685486


[I 2020-07-03 20:47:03,633] Finished trial#13 with value: 0.5075690373778343 with parameters: {'lr': 0.006284692590279114, 'momentum': 0.8893101866289932}. Best is trial#13 with value: 0.5075690373778343.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5104975253343582, ACC: 0.8448164731264114


[I 2020-07-03 20:47:49,734] Finished trial#14 with value: 0.5104975253343582 with parameters: {'lr': 0.005063145350314804, 'momentum': 0.8197161960858727}. Best is trial#13 with value: 0.5075690373778343.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.517751906812191, ACC: 0.8432539731264115


[I 2020-07-03 20:48:19,402] Finished trial#15 with value: 0.517751906812191 with parameters: {'lr': 0.004754352005186824, 'momentum': 0.8166970100187024}. Best is trial#13 with value: 0.5075690373778343.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5097382679581642, ACC: 0.8432539731264115


[I 2020-07-03 20:48:49,102] Finished trial#16 with value: 0.5097382679581642 with parameters: {'lr': 0.004610301102350988, 'momentum': 0.8360673015569071}. Best is trial#13 with value: 0.5075690373778343.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.8418743252754212, ACC: 0.8313574761152267


[I 2020-07-03 20:49:18,860] Finished trial#17 with value: 0.8418743252754212 with parameters: {'lr': 0.0018537581543728256, 'momentum': 0.7670527672049179}. Best is trial#13 with value: 0.5075690373778343.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.498178668320179, ACC: 0.8529183268547058


[I 2020-07-03 20:49:48,570] Finished trial#18 with value: 0.498178668320179 with parameters: {'lr': 0.004812406405444112, 'momentum': 0.8583275347264456}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 1.331704217195511, ACC: 0.6256779134273529


[I 2020-07-03 20:50:18,324] Finished trial#19 with value: 1.331704217195511 with parameters: {'lr': 0.00010663868987473341, 'momentum': 0.8704580913416442}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.8373440325260162, ACC: 0.8313574761152267


[I 2020-07-03 20:50:47,979] Finished trial#20 with value: 0.8373440325260162 with parameters: {'lr': 0.0018793991785664612, 'momentum': 0.7663002701292841}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5073713183403015, ACC: 0.8463789731264114


[I 2020-07-03 20:51:17,758] Finished trial#21 with value: 0.5073713183403015 with parameters: {'lr': 0.004427093288335344, 'momentum': 0.8455298795520827}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5028461828827858, ACC: 0.8578951776027679


[I 2020-07-03 20:51:47,429] Finished trial#22 with value: 0.5028461828827858 with parameters: {'lr': 0.006704858222478878, 'momentum': 0.8598288069408165}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5023185387253761, ACC: 0.8479414731264114


[I 2020-07-03 20:52:17,086] Finished trial#23 with value: 0.5023185387253761 with parameters: {'lr': 0.00427960425242461, 'momentum': 0.8590210374247593}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.6010732769966125, ACC: 0.8349289059638977


[I 2020-07-03 20:52:46,841] Finished trial#24 with value: 0.6010732769966125 with parameters: {'lr': 0.003489408070340888, 'momentum': 0.7795236638473473}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5051060721278191, ACC: 0.85818452835083


[I 2020-07-03 20:53:16,596] Finished trial#25 with value: 0.5051060721278191 with parameters: {'lr': 0.007012997459985982, 'momentum': 0.8586563719296875}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.9403471916913986, ACC: 0.8214037716388702


[I 2020-07-03 20:53:46,334] Finished trial#26 with value: 0.9403471916913986 with parameters: {'lr': 0.0013035748335349932, 'momentum': 0.7954272348274283}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5110106810927391, ACC: 0.8458002716302871


[I 2020-07-03 20:54:16,017] Finished trial#27 with value: 0.5110106810927391 with parameters: {'lr': 0.0077133022346861525, 'momentum': 0.7326134020687094}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 1.1124606639146806, ACC: 0.8027199119329452


[I 2020-07-03 20:54:46,044] Finished trial#28 with value: 1.1124606639146806 with parameters: {'lr': 0.0005250800672394833, 'momentum': 0.8713776467968388}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5659467592835427, ACC: 0.8364914059638977


[I 2020-07-03 20:55:15,881] Finished trial#29 with value: 0.5659467592835427 with parameters: {'lr': 0.00372528128488455, 'momentum': 0.7996418417067669}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.9158313989639282, ACC: 0.8245287716388703


[I 2020-07-03 20:55:46,089] Finished trial#30 with value: 0.9158313989639282 with parameters: {'lr': 0.0018748628447404743, 'momentum': 0.7200422405594902}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5006795361638069, ACC: 0.856332677602768


[I 2020-07-03 20:56:15,855] Finished trial#31 with value: 0.5006795361638069 with parameters: {'lr': 0.006338951549525465, 'momentum': 0.8605992823796142}. Best is trial#18 with value: 0.498178668320179.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.4973932966589928, ACC: 0.8532076776027679


[I 2020-07-03 20:56:46,126] Finished trial#32 with value: 0.4973932966589928 with parameters: {'lr': 0.005638888881777268, 'momentum': 0.8497150508132949}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5013158261775971, ACC: 0.8476521223783493


[I 2020-07-03 20:57:16,373] Finished trial#33 with value: 0.5013158261775971 with parameters: {'lr': 0.005449740712645151, 'momentum': 0.8344842118093602}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.6036181226372719, ACC: 0.8349950432777404


[I 2020-07-03 20:57:47,109] Finished trial#34 with value: 0.6036181226372719 with parameters: {'lr': 0.0026452946118709093, 'momentum': 0.8315738301457176}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5009583294391632, ACC: 0.8561094641685486


[I 2020-07-03 20:58:17,888] Finished trial#35 with value: 0.5009583294391632 with parameters: {'lr': 0.00587480554121094, 'momentum': 0.8828883535034651}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5377031370997429, ACC: 0.8607308268547058


[I 2020-07-03 20:58:48,722] Finished trial#36 with value: 0.5377031370997429 with parameters: {'lr': 0.008926924490083754, 'momentum': 0.8809399494092747}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5071277871727944, ACC: 0.8460896223783493


[I 2020-07-03 20:59:18,906] Finished trial#37 with value: 0.5071277871727944 with parameters: {'lr': 0.00612479457389616, 'momentum': 0.7988145819652185}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.8984489351511001, ACC: 0.8328538358211517


[I 2020-07-03 20:59:48,709] Finished trial#38 with value: 0.8984489351511001 with parameters: {'lr': 0.003075573880291357, 'momentum': 0.5558547824370474}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.6978113040328026, ACC: 0.8282324761152268


[I 2020-07-03 21:00:18,415] Finished trial#39 with value: 0.6978113040328026 with parameters: {'lr': 0.0013713014341580375, 'momentum': 0.8821073846862513}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.501986576616764, ACC: 0.856332677602768


[I 2020-07-03 21:00:48,280] Finished trial#40 with value: 0.501986576616764 with parameters: {'lr': 0.00806591542228223, 'momentum': 0.8141330145525059}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.49940393716096876, ACC: 0.8510664761066437


[I 2020-07-03 21:01:17,928] Finished trial#41 with value: 0.49940393716096876 with parameters: {'lr': 0.005389076195181052, 'momentum': 0.8468873018957996}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.49941366314888, ACC: 0.8529183268547058


[I 2020-07-03 21:01:47,623] Finished trial#42 with value: 0.49941366314888 with parameters: {'lr': 0.005556703602478259, 'momentum': 0.8475175336328599}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5234861850738526, ACC: 0.8380539059638977


[I 2020-07-03 21:02:17,231] Finished trial#43 with value: 0.5234861850738526 with parameters: {'lr': 0.0036521502139770863, 'momentum': 0.8493354002445885}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5031858935952187, ACC: 0.8578951776027679


[I 2020-07-03 21:02:46,877] Finished trial#44 with value: 0.5031858935952187 with parameters: {'lr': 0.00980640324782613, 'momentum': 0.7811570318721086}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5857230976223946, ACC: 0.8349950432777404


[I 2020-07-03 21:03:16,594] Finished trial#45 with value: 0.5857230976223946 with parameters: {'lr': 0.0026184862847948927, 'momentum': 0.845233670313885}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.6648435816168785, ACC: 0.8330770552158355


[I 2020-07-03 21:03:46,290] Finished trial#46 with value: 0.6648435816168785 with parameters: {'lr': 0.00406716924320123, 'momentum': 0.6771108282689534}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5096331372857094, ACC: 0.8479414731264114


[I 2020-07-03 21:04:24,018] Finished trial#47 with value: 0.5096331372857094 with parameters: {'lr': 0.005373769076267617, 'momentum': 0.8102574148667122}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5055279046297073, ACC: 0.8510664731264115


[I 2020-07-03 21:05:06,164] Finished trial#48 with value: 0.5055279046297073 with parameters: {'lr': 0.007725464720398875, 'momentum': 0.757959353011282}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5025134295225143, ACC: 0.8448826134204864


[I 2020-07-03 21:05:36,000] Finished trial#49 with value: 0.5025134295225143 with parameters: {'lr': 0.0029770168145560627, 'momentum': 0.8996217709986357}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5133072577416897, ACC: 0.8613095283508301


[I 2020-07-03 21:06:05,737] Finished trial#50 with value: 0.5133072577416897 with parameters: {'lr': 0.009891185843061624, 'momentum': 0.8243339548178882}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5001281499862671, ACC: 0.8578951776027679


[I 2020-07-03 21:06:35,449] Finished trial#51 with value: 0.5001281499862671 with parameters: {'lr': 0.005940307518252711, 'momentum': 0.8749812937600042}. Best is trial#32 with value: 0.4973932966589928.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.4962854325771332, ACC: 0.8497933268547058


[I 2020-07-03 21:07:05,317] Finished trial#52 with value: 0.4962854325771332 with parameters: {'lr': 0.004961516392725852, 'momentum': 0.8677498031158593}. Best is trial#52 with value: 0.4962854325771332.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.4956252224743366, ACC: 0.8561094641685486


[I 2020-07-03 21:07:34,984] Finished trial#53 with value: 0.4956252224743366 with parameters: {'lr': 0.0049311387000938005, 'momentum': 0.8949810015323556}. Best is trial#53 with value: 0.4956252224743366.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.4926346495747566, ACC: 0.8480076134204865


[I 2020-07-03 21:08:04,712] Finished trial#54 with value: 0.4926346495747566 with parameters: {'lr': 0.004144441719651159, 'momentum': 0.8985078355329079}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.494085419178009, ACC: 0.8563988149166107


[I 2020-07-03 21:08:34,402] Finished trial#55 with value: 0.494085419178009 with parameters: {'lr': 0.004385925588751196, 'momentum': 0.8999122337365262}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5465383842587471, ACC: 0.8300181895494461


[I 2020-07-03 21:09:04,100] Finished trial#56 with value: 0.5465383842587471 with parameters: {'lr': 0.0020967496872158265, 'momentum': 0.8983698328229224}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.49381727874279024, ACC: 0.8576719641685486


[I 2020-07-03 21:09:33,828] Finished trial#57 with value: 0.49381727874279024 with parameters: {'lr': 0.00438624264644518, 'momentum': 0.8990023145864727}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5428416922688484, ACC: 0.8284556895494462


[I 2020-07-03 21:10:03,516] Finished trial#58 with value: 0.5428416922688484 with parameters: {'lr': 0.00224767554914959, 'momentum': 0.8933020574803477}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.4931644096970558, ACC: 0.8495701134204865


[I 2020-07-03 21:10:33,192] Finished trial#59 with value: 0.4931644096970558 with parameters: {'lr': 0.003606950640278496, 'momentum': 0.8987150631418933}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.49742158204317094, ACC: 0.8464451134204865


[I 2020-07-03 21:11:02,940] Finished trial#60 with value: 0.49742158204317094 with parameters: {'lr': 0.0033238253680310612, 'momentum': 0.8962541508432517}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.5002163961529732, ACC: 0.8510664731264115


[I 2020-07-03 21:11:32,609] Finished trial#61 with value: 0.5002163961529732 with parameters: {'lr': 0.004091949155944398, 'momentum': 0.8702658044156668}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.49456333070993425, ACC: 0.8545469641685486


[I 2020-07-03 21:12:02,331] Finished trial#62 with value: 0.49456333070993425 with parameters: {'lr': 0.00474262609991237, 'momentum': 0.8863940479394523}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



***** Cross Validation Result *****
LOSS: 0.49470569267868997, ACC: 0.8579613149166108


[I 2020-07-03 21:12:31,992] Finished trial#63 with value: 0.49470569267868997 with parameters: {'lr': 0.004486400306359615, 'momentum': 0.8998348902346536}. Best is trial#54 with value: 0.4926346495747566.

Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.



KeyboardInterrupt: 

In [None]:
# import optuna

# sampler = optuna.samplers.TPESampler()
# study = optuna.create_study(sampler=sampler, direction='maximize')
# study.optimize(func=objective, n_trials=500)

# Submission

In [490]:
import pandas as pd

In [491]:
sample.tail()

Unnamed: 0,ID,Depression,Alcohol,Suicide,Drugs
304,Z9A6ACLK,0,0,0,0
305,ZDUOIGKN,0,0,0,0
306,ZHQ60CCH,0,0,0,0
307,ZVIJMA4O,0,0,0,0
308,ZYIFAY98,0,0,0,0


In [492]:
(test.ID == sample.ID).all()

True

In [493]:
LABELS.vocab.stoi.keys()

dict_keys(['Depression', 'Alcohol', 'Suicide', 'Drugs'])

In [494]:
predictions = pd.DataFrame(y_pred, columns=LABELS.vocab.stoi.keys())
predictions.head()

Unnamed: 0,Depression,Alcohol,Suicide,Drugs
0,0.766105,0.024983,0.197407,0.011505
1,0.970994,0.002207,0.023719,0.003081
2,0.992472,0.000565,0.006251,0.000712
3,0.982532,0.001679,0.01348,0.002308
4,0.558778,0.141814,0.108502,0.190906


In [495]:
result = pd.concat([sample[['ID']], predictions], axis=1)

In [496]:
result.head()

Unnamed: 0,ID,Depression,Alcohol,Suicide,Drugs
0,02V56KMO,0.766105,0.024983,0.197407,0.011505
1,03BMGTOK,0.970994,0.002207,0.023719,0.003081
2,03LZVFM6,0.992472,0.000565,0.006251,0.000712
3,0EPULUM5,0.982532,0.001679,0.01348,0.002308
4,0GM4C5GD,0.558778,0.141814,0.108502,0.190906


In [497]:
result.to_csv('./Pretrained_glove_300_text_contraction_all_0.28_softmax.csv', 
              index=False)

In [231]:
# np.sum([0.921355,0.010272,0.058423,0.009950]) # import spacy
nlp = spacy.load('en')

def predict_sentiment(model, sentence, min_len = 5):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    if len(tokenized) < min_len:
        tokenized += ['<pad>'] * (min_len - len(tokenized))
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(0)
    prediction = torch.sigmoid(model(tensor))
    return prediction.item()

1.0

In [None]:
class load_data(object):
    def __init__(self, SEED=1234):
        torch.manual_seed(SEED)
        torch.cuda.manual_seed(SEED)
        torch.backends.cudnn.deterministic = True

        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        
        init_token = tokenizer.cls_token
        eos_token = tokenizer.sep_token
        pad_token = tokenizer.pad_token
        unk_token = tokenizer.unk_token
        
        init_token_idx = tokenizer.convert_tokens_to_ids(init_token)
        eos_token_idx = tokenizer.convert_tokens_to_ids(eos_token)
        pad_token_idx = tokenizer.convert_tokens_to_ids(pad_token)
        unk_token_idx = tokenizer.convert_tokens_to_ids(unk_token)
        
        LABELS = data.LabelField(dtype = torch.float)

        TEXT = data.Field(batch_first = True,
                          use_vocab = False,
        #                   tokenize = tokenize_and_cut,
                          preprocessing = tokenizer.convert_tokens_to_ids,
                          init_token = init_token_idx,
                          eos_token = eos_token_idx,
                          pad_token = pad_token_idx,
                          unk_token = unk_token_idx)


#         self.train_data, self.test_data = datasets.IMDB.splits(TEXT, LABEL)
        
        self.train_data, self.test_data = data.TabularDataset.splits(
            path='./data', train='Train_process.csv',
            test='Test_process.csv', format='csv', skip_header=True,
            fields=[('ID', None),
                    ('text', TEXT),
                    ('label', LABELS)])

        self.SEED = SEED


    def get_fold_data(self, num_folds=10):
        """
        More details about 'fields' are available at 
        https://github.com/pytorch/text/blob/master/torchtext/datasets/imdb.py
        """

        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        
        init_token = tokenizer.cls_token
        eos_token = tokenizer.sep_token
        pad_token = tokenizer.pad_token
        unk_token = tokenizer.unk_token
        
        init_token_idx = tokenizer.convert_tokens_to_ids(init_token)
        eos_token_idx = tokenizer.convert_tokens_to_ids(eos_token)
        pad_token_idx = tokenizer.convert_tokens_to_ids(pad_token)
        unk_token_idx = tokenizer.convert_tokens_to_ids(unk_token)
        
        LABELS = data.LabelField(dtype = torch.float)

        TEXT = data.Field(batch_first = True,
                          use_vocab = False,
        #                   tokenize = tokenize_and_cut,
                          preprocessing = tokenizer.convert_tokens_to_ids,
                          init_token = init_token_idx,
                          eos_token = eos_token_idx,
                          pad_token = pad_token_idx,
                          unk_token = unk_token_idx)

        fields = [('text', TEXT), ('label', LABELS)]
        
#         kf = KFold(n_splits=num_folds, shuffle=True, random_state=self.SEED)
        kf = StratifiedKFold(n_splits=num_folds, random_state=self.SEED)
        
        train_data_arr = np.array(self.train_data.examples)

#         ipdb.set_trace()
        for train_index, val_index in kf.split(train_data_arr, y_kfold):
            yield(
                TEXT,
                LABELS,
                data.Dataset(train_data_arr[train_index], fields=fields),
                data.Dataset(train_data_arr[val_index], fields=fields),
            )
    
    def get_test_data(self):
        return self.test_data

In [None]:
# best_loss_val = 0.8

def objective(trial):
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')    
    
    
    lr  = trial.suggest_loguniform('lr', 1e-5, 1e-1)
#     optim_ = trial.suggest_categorical('optim_',[optim.SGD, optim.RMSprop,optim.Adam])
    
    
    momentum = trial.suggest_uniform('momentum', 0, 0.9)
    
    
    
#     data_generator = load_data()
#     _history = []
#     device = None
#     model = None
#     criterion = None
#     fold_index = 0
#     num_folds = 5
#     batch_size = 32
#     epochs = 20
    
    HIDDEN_DIM = 500
    OUTPUT_DIM = len(LABELS.vocab)
    N_LAYERS = 2
    BIDIRECTIONAL = True
    DROPOUT = 0.5

    import torch.optim as optim

    # optimizer = optim.Adam(model.parameters())

    best_valid_acc = 0.9
    best_val_loss = 0.2

    for TEXT, LABELS, train_data, val_data in data_generator.get_fold_data(num_folds=num_folds):
#         print("***** Running Training *****")
#         print(f"Now fold: {fold_index + 1} / {num_folds}")

        TEXT.build_vocab(train_data,
                         vectors = "glove.6B.300d",
                         unk_init = torch.Tensor.normal_)

#         print(f'Embedding size: {TEXT.vocab.vectors.size()}.')
        LABELS.build_vocab(train_data) # For converting str into float labels.

    #     Model(len(TEXT.vocab), embedding_dim, hidden_dim,
    #         output_dim, num_layers, dropout, TEXT.vocab.vectors, embedding_trainable)

        HIDDEN_DIM = 500
        OUTPUT_DIM = len(LABELS.vocab)
        N_LAYERS = 2
        BIDIRECTIONAL = True
        DROPOUT = 0.5

        model = BERTGRUSentiment(model,
                         HIDDEN_DIM,
                         OUTPUT_DIM,
                         N_LAYERS,
                         BIDIRECTIONAL,
                         DROPOUT)


        # Get w
        from collections import Counter
        class_distrbution = Counter()
        for text in y_kfold:
            class_distrbution[text] += 1


        class_weights = [class_distrbution[i] for i in LABELS.vocab.stoi.keys()]
        class_weights_normalized = [max(class_weights)/i for i in class_weights]


        w = torch.Tensor(class_weights_normalized)
        w = w.to(device)

#         optimizer = optim.Adam(model.parameters())
#         criterion = nn.CrossEntropyLoss(weight=w)
        
        optimizer = optim.SGD(model.parameters(), lr = lr, momentum=momentum, 
                      weight_decay=0, dampening=0, nesterov=False)
        
#         optimizer = optim.Adam(model.parameters(), lr = 1e-4, betas=(0.9, 0.999), 
#                       weight_decay=0.0, amsgrad=False)
    
        criterion = nn.CrossEntropyLoss(weight = w)
        
        

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        criterion = criterion.to(device)

        train_iterator = data.Iterator(train_data, batch_size=batch_size, sort_key=lambda x: len(x.text), device=device)
        val_iterator = data.Iterator(val_data, batch_size=batch_size, sort_key=lambda x: len(x.text), device=device)

        for epoch in range(epochs):
            train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
            val_loss, val_acc = evaluate(model, val_iterator, criterion)
#             print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
#             print(f'| Epoch: {epoch+1:02} | Valid Loss: {val_loss:.3f} | Valid Acc: {val_acc*100:.2f}%')
#             print('################################################################################')
#             if val_loss < best_val_loss:
#                 best_val_loss = val_loss
#                 print('Saving Model ...')
#                 torch.save(model.state_dict(), 'Best_Model_'+str(best_val_loss)+'.pt')
#                 print('*****************************************************')
#                 print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
#                 print('*****************************************************')

        val_loss, val_acc = evaluate(model, val_iterator, criterion) 

#         if val_loss < best_val_loss:
#                 best_val_loss = val_loss
#                 print('Saving Model ...')
#                 torch.save(model.state_dict(), 'Best_Model_'+str(best_val_loss)+'.pt')
#                 print('*****************************************************')
#                 print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
#                 print('*****************************************************')

    #     if val_acc > best_valid_acc:
    #         best_valid_acc = val_acc
    #         print('Saving Model ...')
    #         torch.save(model.state_dict(), 'Best_Model_'+str(val_acc)+'.pt')
    #         print('*****************************************************')
    #         print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss, val_acc))
    #         print('*****************************************************')

#         print(f'Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.2f}% |')

        _history.append([val_loss, val_acc])
        fold_index += 1

    _history = np.asarray(_history)
    loss = np.mean(_history[:, 0])
    acc = np.mean(_history[:, 1])

    print('***** Cross Validation Result *****')
    print(f'LOSS: {loss}, ACC: {acc}')
    
    # Handle pruning based on the intermediate value.
    if trial.should_prune():
        raise optuna.exceptions.TrialPruned()
            
            
    return loss