In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizer, RobertaModel

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, precision_score, recall_score
from tqdm.notebook import trange, tqdm
import numpy as np

In [4]:
# Load the dataset from a CSV file
df = pd.read_csv('outdata_M.csv')

In [5]:
df

Unnamed: 0,NOTES,EVENT_TYPE
0,5 January: Beheading of 5 citizens in Douaouda...,2
1,Two citizens were beheaded in Hassasna.,2
2,Two citizens were killed in a raid on the vill...,2
3,4 January: 16 citizens were murdered in the vi...,2
4,5 January: Killing of 18 citizens in the Olivi...,2
...,...,...
65528,OLF and Borana ethnic militia attack civilians...,2
65529,Ethnic raiders and OLF attack Degodia Somali e...,2
65530,OLF and Borana ethnic militia attack civilians...,2
65531,University students protest over killing in Wa...,3


In [6]:
df.shape

(65533, 2)

# **Text Preprocessing: **
Clean the text data by removing stop words, special characters, punctuation, etc.

In [7]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer



# Lowercase text
df["NOTES"] = df["NOTES"].str.lower()

# Remove punctuation
df["NOTES"] = df["NOTES"].apply(lambda x: re.sub(r'[^\w\s]', '', str(x)) if isinstance(x, str) else '')


# Remove stop words
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
df["NOTES"] = df["NOTES"].apply(lambda x: " ".join([word for word in x.split() if word not in stop_words]))

# Remove numbers
df["NOTES"] = df["NOTES"].apply(lambda x: re.sub(r'\d', '', str(x)))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [8]:
df

Unnamed: 0,NOTES,EVENT_TYPE
0,january beheading citizens douaouda tipaza,2
1,two citizens beheaded hassasna,2
2,two citizens killed raid village hassi el abd ...,2
3,january citizens murdered village benachour ...,2
4,january killing citizens oliviers district d...,2
...,...,...
65528,olf borana ethnic militia attack civilians mas...,2
65529,ethnic raiders olf attack degodia somali ethni...,2
65530,olf borana ethnic militia attack civilians mas...,2
65531,university students protest killing wajir dist...,3


In [9]:
df.isnull().sum()

NOTES         0
EVENT_TYPE    0
dtype: int64

In [10]:
df.dropna(axis=0, how="any", thresh=None, subset=None, inplace=False).shape

(65533, 2)

In [11]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

In [12]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe['NOTES']
        self.targets = dataframe['EVENT_TYPE']
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.long)
        }

In [13]:
# Sections of config

# Defining some key variables that will be used later on in the training
MAX_LEN = 128
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 8
EPOCHS = 10
LEARNING_RATE = 0.01

In [14]:
# # Setting up the device for GPU usage
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [15]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=False)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [16]:
training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

In [17]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

In [18]:
training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

In [19]:
class RobertaClass(torch.nn.Module):
    def __init__(self):
        super(RobertaClass, self).__init__()
        self.l1 = RobertaModel.from_pretrained('roberta-base')
        self.l2 = torch.nn.Dropout(0.5)
        self.l3 = torch.nn.Linear(768, 6)
    
    def forward(self, ids, mask, token_type_ids):
        # breakpoint()
        # _, output_1= self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_2 = self.l2(output_1.pooler_output)
        output = self.l3(output_2)
        return output

model = RobertaClass()
model.to(device)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


RobertaClass(
  (l1): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), e

In [20]:
def loss_fn(outputs, targets):
    return torch.nn.CrossEntropyLoss()(outputs, targets)

In [21]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [22]:
def train(epoch):
    num_of_batches = int(len(train_df)/training_loader.batch_size)
    model.train()
    running_loss = 0.0    
    counter = 0

    for data in tqdm(training_loader, total=num_of_batches, desc="Epoch No: " + str(epoch+1), colour="blue"):
        counter += 1

        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets-1)
        tqdm.write(f"Training Iteration {counter}, Loss: {loss.item()}")
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    return running_loss/counter

In [23]:
def get_class(outputs):
    scores = torch.tensor(outputs)
    probs = torch.nn.functional.softmax(scores, dim=1)
    print(probs)

    _, predictions = torch.max(probs, dim=1)
    predictions += 1
    return predictions.tolist()

In [24]:
def validation(loader):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    running_loss = 0.0    
    counter = 0
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            counter += 1
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(outputs.cpu().detach().numpy().tolist())
            loss = loss_fn(outputs, targets-1)
            running_loss += loss.item()
    loss = running_loss/counter
    fin_outputs = get_class(fin_outputs)
    return fin_outputs, fin_targets, loss

In [25]:
for epoch in trange(EPOCHS, desc='Total Epochs: ', colour="green"):
    train_epoch_loss = train(epoch)
    #print(f"Test Average Recall: {test_average_recall:.4f}")
    outputs_train, targets_train, train_epoch_loss = validation(training_loader)

    #  Testing started from here
    #  evaluate on training data
    train_accuracy = accuracy_score(targets_train, outputs_train)
    print(f"Training loss for epcoh {epoch+1}: {train_epoch_loss}")
    print(f"Training accuracy for epoch {epoch+1}: {train_accuracy}")


    # evaluate on testing data
    outputs_test, targets_test, valid_epoch_loss = validation(testing_loader)
    test_accuracy = accuracy_score(targets_test, outputs_test)
    print(f"Validation loss for epcoh {epoch+1}: {valid_epoch_loss}")
    print(f"Test accuracy for epoch {epoch+1}: {test_accuracy}")

Total Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch No: 1:   0%|          | 0/6553 [00:00<?, ?it/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 5.476740837097168
Training Iteration 1565, Loss: 4.142246246337891
Training Iteration 1566, Loss: 4.291141510009766
Training Iteration 1567, Loss: 8.08358383178711
Training Iteration 1568, Loss: 2.3080925941467285
Training Iteration 1569, Loss: 6.173572540283203
Training Iteration 1570, Loss: 2.9670050144195557
Training Iteration 1571, Loss: 3.9319076538085938
Training Iteration 1572, Loss: 7.4030561447143555
Training Iteration 1573, Loss: 3.511410713195801
Training Iteration 1574, Loss: 7.062469959259033
Training Iteration 1575, Loss: 2.5509140491485596
Training Iteration 1576, Loss: 3.1586897373199463
Training Iteration 1577, Loss: 8.117525100708008
Training Iteration 1578, Loss: 6.536108016967773
Training Iteration 1579, Loss: 4.543871879577637
Training Iteration 1580, Loss: 3.7554030418395996
Training Iteration 1581, Loss: 4.950090408325195
Training Iteration 1582, Loss: 2.18680763244628



tensor([[9.6013e-01, 2.4537e-02, 1.3370e-02, 4.9185e-04, 1.4719e-03, 3.2627e-07],
        [9.6013e-01, 2.4537e-02, 1.3370e-02, 4.9185e-04, 1.4719e-03, 3.2627e-07],
        [9.6013e-01, 2.4537e-02, 1.3370e-02, 4.9185e-04, 1.4719e-03, 3.2627e-07],
        ...,
        [9.6013e-01, 2.4537e-02, 1.3370e-02, 4.9185e-04, 1.4719e-03, 3.2627e-07],
        [9.6013e-01, 2.4537e-02, 1.3370e-02, 4.9185e-04, 1.4719e-03, 3.2627e-07],
        [9.6013e-01, 2.4537e-02, 1.3370e-02, 4.9185e-04, 1.4719e-03, 3.2627e-07]])
Validation loss for epcoh 1: 3.6647503227086387
Test accuracy for epoch 1: 0.3103685053788052


Epoch No: 2:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 5.577312469482422




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 5.436380863189697
Training Iteration 1565, Loss: 6.543172359466553
Training Iteration 1566, Loss: 4.09991455078125
Training Iteration 1567, Loss: 4.605649948120117
Training Iteration 1568, Loss: 2.8721776008605957
Training Iteration 1569, Loss: 4.214337348937988
Training Iteration 1570, Loss: 5.164185523986816
Training Iteration 1571, Loss: 5.787470817565918
Training Iteration 1572, Loss: 7.026432037353516
Training Iteration 1573, Loss: 5.173123359680176
Training Iteration 1574, Loss: 4.802278518676758
Training Iteration 1575, Loss: 8.005227088928223
Training Iteration 1576, Loss: 6.445741653442383
Training Iteration 1577, Loss: 2.499340057373047
Training Iteration 1578, Loss: 2.9497318267822266
Training Iteration 1579, Loss: 4.792680740356445
Training Iteration 1580, Loss: 5.042891502380371
Training Iteration 1581, Loss: 4.956450939178467
Training Iteration 1582, Loss: 3.364658832550049
Tra



tensor([[2.6672e-03, 9.6276e-01, 9.5251e-04, 2.5824e-03, 3.1040e-02, 2.7052e-09],
        [2.6672e-03, 9.6276e-01, 9.5251e-04, 2.5824e-03, 3.1040e-02, 2.7052e-09],
        [2.6672e-03, 9.6276e-01, 9.5251e-04, 2.5824e-03, 3.1040e-02, 2.7052e-09],
        ...,
        [2.6672e-03, 9.6276e-01, 9.5251e-04, 2.5824e-03, 3.1040e-02, 2.7052e-09],
        [2.6672e-03, 9.6276e-01, 9.5251e-04, 2.5824e-03, 3.1040e-02, 2.7052e-09],
        [2.6672e-03, 9.6276e-01, 9.5251e-04, 2.5824e-03, 3.1040e-02, 2.7052e-09]])
Validation loss for epcoh 2: 4.789524911681805
Test accuracy for epoch 2: 0.2780956740672923


Epoch No: 3:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 3.1005685329437256




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 5.157634258270264
Training Iteration 1565, Loss: 2.6242334842681885
Training Iteration 1566, Loss: 2.732616901397705
Training Iteration 1567, Loss: 3.546945095062256
Training Iteration 1568, Loss: 2.845391273498535
Training Iteration 1569, Loss: 8.103182792663574
Training Iteration 1570, Loss: 4.622302532196045
Training Iteration 1571, Loss: 3.2648797035217285
Training Iteration 1572, Loss: 5.331798076629639
Training Iteration 1573, Loss: 4.076218128204346
Training Iteration 1574, Loss: 2.3684897422790527
Training Iteration 1575, Loss: 2.8101160526275635
Training Iteration 1576, Loss: 8.587151527404785
Training Iteration 1577, Loss: 7.538923740386963
Training Iteration 1578, Loss: 2.804828405380249
Training Iteration 1579, Loss: 4.743838787078857
Training Iteration 1580, Loss: 2.4783055782318115
Training Iteration 1581, Loss: 3.9186031818389893
Training Iteration 1582, Loss: 1.64118516445159



tensor([[8.2702e-01, 1.6714e-02, 4.2490e-02, 1.6483e-03, 1.1201e-01, 1.2178e-04],
        [8.2702e-01, 1.6714e-02, 4.2490e-02, 1.6483e-03, 1.1201e-01, 1.2178e-04],
        [8.2702e-01, 1.6714e-02, 4.2490e-02, 1.6483e-03, 1.1201e-01, 1.2178e-04],
        ...,
        [8.2702e-01, 1.6714e-02, 4.2490e-02, 1.6483e-03, 1.1201e-01, 1.2178e-04],
        [8.2702e-01, 1.6714e-02, 4.2490e-02, 1.6483e-03, 1.1201e-01, 1.2178e-04],
        [8.2702e-01, 1.6714e-02, 4.2490e-02, 1.6483e-03, 1.1201e-01, 1.2178e-04]])
Validation loss for epcoh 3: 2.912689926840315
Test accuracy for epoch 3: 0.3103685053788052


Epoch No: 4:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 2.736755847930908




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 4.325657367706299
Training Iteration 1565, Loss: 3.7345287799835205
Training Iteration 1566, Loss: 3.738718271255493
Training Iteration 1567, Loss: 4.82472562789917
Training Iteration 1568, Loss: 5.795790672302246
Training Iteration 1569, Loss: 6.878973007202148
Training Iteration 1570, Loss: 3.9092838764190674
Training Iteration 1571, Loss: 3.250180244445801
Training Iteration 1572, Loss: 3.8108410835266113
Training Iteration 1573, Loss: 4.229984283447266
Training Iteration 1574, Loss: 7.807801246643066
Training Iteration 1575, Loss: 2.821890354156494
Training Iteration 1576, Loss: 4.468331336975098
Training Iteration 1577, Loss: 5.404160499572754
Training Iteration 1578, Loss: 8.247955322265625
Training Iteration 1579, Loss: 6.38871431350708
Training Iteration 1580, Loss: 1.7977235317230225
Training Iteration 1581, Loss: 5.338288307189941
Training Iteration 1582, Loss: 5.2716522216796875
T



tensor([[1.4190e-02, 9.7346e-01, 4.2595e-04, 3.2995e-03, 8.4423e-03, 1.8424e-04],
        [1.4190e-02, 9.7346e-01, 4.2595e-04, 3.2995e-03, 8.4423e-03, 1.8424e-04],
        [1.4190e-02, 9.7346e-01, 4.2595e-04, 3.2995e-03, 8.4423e-03, 1.8424e-04],
        ...,
        [1.4190e-02, 9.7346e-01, 4.2595e-04, 3.2995e-03, 8.4423e-03, 1.8424e-04],
        [1.4190e-02, 9.7346e-01, 4.2595e-04, 3.2995e-03, 8.4423e-03, 1.8424e-04],
        [1.4190e-02, 9.7346e-01, 4.2595e-04, 3.2995e-03, 8.4423e-03, 1.8424e-04]])
Validation loss for epcoh 4: 4.127111217755963
Test accuracy for epoch 4: 0.2780956740672923


Epoch No: 5:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 4.475252151489258




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 4.206798553466797
Training Iteration 1565, Loss: 1.4697126150131226
Training Iteration 1566, Loss: 6.507330894470215
Training Iteration 1567, Loss: 4.1826982498168945
Training Iteration 1568, Loss: 5.538692474365234
Training Iteration 1569, Loss: 2.920654296875
Training Iteration 1570, Loss: 2.9083752632141113
Training Iteration 1571, Loss: 4.548991680145264
Training Iteration 1572, Loss: 2.486471176147461
Training Iteration 1573, Loss: 3.2735376358032227
Training Iteration 1574, Loss: 4.0081610679626465
Training Iteration 1575, Loss: 2.6354143619537354
Training Iteration 1576, Loss: 4.078603744506836
Training Iteration 1577, Loss: 2.397029399871826
Training Iteration 1578, Loss: 4.25202751159668
Training Iteration 1579, Loss: 6.203421592712402
Training Iteration 1580, Loss: 3.5837185382843018
Training Iteration 1581, Loss: 4.956767559051514
Training Iteration 1582, Loss: 4.3175458908081055




tensor([[3.1731e-01, 1.0614e-01, 5.1673e-01, 4.7347e-02, 1.2469e-02, 4.5414e-06],
        [3.1731e-01, 1.0614e-01, 5.1673e-01, 4.7347e-02, 1.2469e-02, 4.5414e-06],
        [3.1731e-01, 1.0614e-01, 5.1673e-01, 4.7347e-02, 1.2469e-02, 4.5414e-06],
        ...,
        [3.1731e-01, 1.0614e-01, 5.1673e-01, 4.7347e-02, 1.2469e-02, 4.5414e-06],
        [3.1731e-01, 1.0614e-01, 5.1673e-01, 4.7347e-02, 1.2469e-02, 4.5414e-06],
        [3.1731e-01, 1.0614e-01, 5.1673e-01, 4.7347e-02, 1.2469e-02, 4.5414e-06]])
Validation loss for epcoh 5: 2.173406802349661
Test accuracy for epoch 5: 0.20653086137178606


Epoch No: 6:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 4.894985675811768




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 5.628978252410889
Training Iteration 1565, Loss: 2.753277540206909
Training Iteration 1566, Loss: 4.089610576629639
Training Iteration 1567, Loss: 6.974461555480957
Training Iteration 1568, Loss: 4.780025959014893
Training Iteration 1569, Loss: 5.031686782836914
Training Iteration 1570, Loss: 3.016155481338501
Training Iteration 1571, Loss: 4.529686450958252
Training Iteration 1572, Loss: 4.785595893859863
Training Iteration 1573, Loss: 2.253812789916992
Training Iteration 1574, Loss: 3.1708197593688965
Training Iteration 1575, Loss: 5.106964111328125
Training Iteration 1576, Loss: 5.183438301086426
Training Iteration 1577, Loss: 2.1626696586608887
Training Iteration 1578, Loss: 4.560771942138672
Training Iteration 1579, Loss: 3.7334413528442383
Training Iteration 1580, Loss: 4.377841472625732
Training Iteration 1581, Loss: 4.026440143585205
Training Iteration 1582, Loss: 3.5200607776641846




tensor([[2.8719e-02, 9.4393e-01, 2.4238e-02, 3.1146e-03, 2.6758e-08, 2.5591e-06],
        [2.8719e-02, 9.4393e-01, 2.4238e-02, 3.1146e-03, 2.6758e-08, 2.5591e-06],
        [2.8719e-02, 9.4393e-01, 2.4238e-02, 3.1146e-03, 2.6758e-08, 2.5591e-06],
        ...,
        [2.8719e-02, 9.4393e-01, 2.4238e-02, 3.1146e-03, 2.6758e-08, 2.5591e-06],
        [2.8719e-02, 9.4393e-01, 2.4238e-02, 3.1146e-03, 2.6758e-08, 2.5591e-06],
        [2.8719e-02, 9.4393e-01, 2.4238e-02, 3.1146e-03, 2.6758e-08, 2.5591e-06]])
Validation loss for epcoh 6: 4.262442031599095
Test accuracy for epoch 6: 0.2780956740672923


Epoch No: 7:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 7.641659736633301




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 4.699281215667725
Training Iteration 1565, Loss: 5.223748683929443
Training Iteration 1566, Loss: 3.5904479026794434
Training Iteration 1567, Loss: 4.5503973960876465
Training Iteration 1568, Loss: 3.158687114715576
Training Iteration 1569, Loss: 4.512761116027832
Training Iteration 1570, Loss: 4.567755222320557
Training Iteration 1571, Loss: 3.6679131984710693
Training Iteration 1572, Loss: 5.622552394866943
Training Iteration 1573, Loss: 4.033694267272949
Training Iteration 1574, Loss: 5.170106410980225
Training Iteration 1575, Loss: 5.346173286437988
Training Iteration 1576, Loss: 2.3116207122802734
Training Iteration 1577, Loss: 3.5026817321777344
Training Iteration 1578, Loss: 3.7856438159942627
Training Iteration 1579, Loss: 6.107600212097168
Training Iteration 1580, Loss: 5.451439380645752
Training Iteration 1581, Loss: 5.024309158325195
Training Iteration 1582, Loss: 5.36896705627441



tensor([[1.3819e-01, 1.1361e-01, 1.1656e-03, 7.1553e-01, 3.1337e-02, 1.6853e-04],
        [1.3819e-01, 1.1361e-01, 1.1656e-03, 7.1553e-01, 3.1337e-02, 1.6853e-04],
        [1.3819e-01, 1.1361e-01, 1.1656e-03, 7.1553e-01, 3.1337e-02, 1.6853e-04],
        ...,
        [1.3819e-01, 1.1361e-01, 1.1656e-03, 7.1553e-01, 3.1337e-02, 1.6853e-04],
        [1.3819e-01, 1.1361e-01, 1.1656e-03, 7.1553e-01, 3.1337e-02, 1.6853e-04],
        [1.3819e-01, 1.1361e-01, 1.1656e-03, 7.1553e-01, 3.1337e-02, 1.6853e-04]])
Validation loss for epcoh 7: 3.22711774428637
Test accuracy for epoch 7: 0.08926527809567407


Epoch No: 8:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 4.998380661010742




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 4.4460225105285645
Training Iteration 1565, Loss: 3.1758296489715576
Training Iteration 1566, Loss: 3.1885476112365723
Training Iteration 1567, Loss: 2.1888742446899414
Training Iteration 1568, Loss: 3.4059739112854004
Training Iteration 1569, Loss: 4.923968315124512
Training Iteration 1570, Loss: 4.779064178466797
Training Iteration 1571, Loss: 4.601027488708496
Training Iteration 1572, Loss: 6.075123310089111
Training Iteration 1573, Loss: 6.600707054138184
Training Iteration 1574, Loss: 3.8128550052642822
Training Iteration 1575, Loss: 5.33490514755249
Training Iteration 1576, Loss: 1.8330273628234863
Training Iteration 1577, Loss: 6.573373794555664
Training Iteration 1578, Loss: 4.4501261711120605
Training Iteration 1579, Loss: 4.333555221557617
Training Iteration 1580, Loss: 2.3023526668548584
Training Iteration 1581, Loss: 3.786206007003784
Training Iteration 1582, Loss: 2.094978332519



tensor([[4.5346e-02, 2.0875e-01, 5.2027e-04, 7.1009e-01, 2.8726e-02, 6.5668e-03],
        [4.5346e-02, 2.0875e-01, 5.2027e-04, 7.1009e-01, 2.8726e-02, 6.5668e-03],
        [4.5346e-02, 2.0875e-01, 5.2027e-04, 7.1009e-01, 2.8726e-02, 6.5668e-03],
        ...,
        [4.5346e-02, 2.0875e-01, 5.2027e-04, 7.1009e-01, 2.8726e-02, 6.5668e-03],
        [4.5346e-02, 2.0875e-01, 5.2027e-04, 7.1009e-01, 2.8726e-02, 6.5668e-03],
        [4.5346e-02, 2.0875e-01, 5.2027e-04, 7.1009e-01, 2.8726e-02, 6.5668e-03]])
Validation loss for epcoh 8: 3.450575482197983
Test accuracy for epoch 8: 0.08926527809567407


Epoch No: 9:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 5.747020244598389




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 2.6631147861480713
Training Iteration 1565, Loss: 3.7439329624176025
Training Iteration 1566, Loss: 4.349673748016357
Training Iteration 1567, Loss: 6.606026649475098
Training Iteration 1568, Loss: 3.561584949493408
Training Iteration 1569, Loss: 3.2483580112457275
Training Iteration 1570, Loss: 2.9761509895324707
Training Iteration 1571, Loss: 7.219094276428223
Training Iteration 1572, Loss: 3.3908884525299072
Training Iteration 1573, Loss: 5.715595722198486
Training Iteration 1574, Loss: 3.1278510093688965
Training Iteration 1575, Loss: 3.1547305583953857
Training Iteration 1576, Loss: 3.160489320755005
Training Iteration 1577, Loss: 4.125607967376709
Training Iteration 1578, Loss: 3.7197885513305664
Training Iteration 1579, Loss: 4.261526107788086
Training Iteration 1580, Loss: 3.3946499824523926
Training Iteration 1581, Loss: 3.576620101928711
Training Iteration 1582, Loss: 4.22991037368



tensor([[9.2335e-01, 5.0644e-02, 2.7527e-03, 2.1914e-02, 1.3184e-03, 2.0004e-05],
        [9.2335e-01, 5.0644e-02, 2.7527e-03, 2.1914e-02, 1.3184e-03, 2.0004e-05],
        [9.2335e-01, 5.0644e-02, 2.7527e-03, 2.1914e-02, 1.3184e-03, 2.0004e-05],
        ...,
        [9.2335e-01, 5.0644e-02, 2.7527e-03, 2.1914e-02, 1.3184e-03, 2.0004e-05],
        [9.2335e-01, 5.0644e-02, 2.7527e-03, 2.1914e-02, 1.3184e-03, 2.0004e-05],
        [9.2335e-01, 5.0644e-02, 2.7527e-03, 2.1914e-02, 1.3184e-03, 2.0004e-05]])
Validation loss for epcoh 9: 3.32775485051922
Test accuracy for epoch 9: 0.3103685053788052


Epoch No: 10:   0%|          | 0/6553 [00:00<?, ?it/s]

Training Iteration 1, Loss: 4.770269393920898




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training Iteration 1564, Loss: 3.7350282669067383
Training Iteration 1565, Loss: 4.820809841156006
Training Iteration 1566, Loss: 3.1551878452301025
Training Iteration 1567, Loss: 3.234713315963745
Training Iteration 1568, Loss: 4.789275169372559
Training Iteration 1569, Loss: 4.314996242523193
Training Iteration 1570, Loss: 7.6658616065979
Training Iteration 1571, Loss: 3.4667515754699707
Training Iteration 1572, Loss: 5.563244819641113
Training Iteration 1573, Loss: 2.4524025917053223
Training Iteration 1574, Loss: 8.049346923828125
Training Iteration 1575, Loss: 3.8093323707580566
Training Iteration 1576, Loss: 6.28852653503418
Training Iteration 1577, Loss: 3.010226249694824
Training Iteration 1578, Loss: 3.834747552871704
Training Iteration 1579, Loss: 2.357412815093994
Training Iteration 1580, Loss: 6.966704845428467
Training Iteration 1581, Loss: 5.9711809158325195
Training Iteration 1582, Loss: 2.700460910797119
T



tensor([[1.9933e-01, 7.7769e-01, 7.6592e-03, 1.5229e-02, 9.7458e-05, 2.1226e-12],
        [1.9933e-01, 7.7769e-01, 7.6592e-03, 1.5229e-02, 9.7458e-05, 2.1226e-12],
        [1.9933e-01, 7.7769e-01, 7.6592e-03, 1.5229e-02, 9.7458e-05, 2.1226e-12],
        ...,
        [1.9933e-01, 7.7769e-01, 7.6592e-03, 1.5229e-02, 9.7458e-05, 2.1226e-12],
        [1.9933e-01, 7.7769e-01, 7.6592e-03, 1.5229e-02, 9.7458e-05, 2.1226e-12],
        [1.9933e-01, 7.7769e-01, 7.6592e-03, 1.5229e-02, 9.7458e-05, 2.1226e-12]])
Validation loss for epcoh 10: 3.6397840332301237
Test accuracy for epoch 10: 0.2780956740672923
