# Solution 2

### Using xlm-roberta for vectorization and then a simple neural net for label classification

#### Let's load the data.

In [1]:
from pathlib import Path
wd = Path.cwd()
wd = wd.parent.parent
wd = wd / 'merged_data' 
sub1 = str(wd) + '/subtask1.parquet'
print(sub1)

/home/matijak/Documents/programiranje/projects/semeval/merged_data/subtask1.parquet


In [2]:
import pandas as pd
df = pd.read_parquet(sub1)
df.head()

Unnamed: 0,lang,art_name,entity,start,end,class1,classes2,text
0,BG,BG_670.txt,Запад,152,156,Antagonist,"[Conspirator, Instigator, Foreign Adversary]",Опитът на колективния Запад да „обезкърви Руси...
1,BG,BG_670.txt,САЩ,530,532,Antagonist,[Instigator],Опитът на колективния Запад да „обезкърви Руси...
2,BG,BG_670.txt,НАТО,535,538,Antagonist,[Instigator],Опитът на колективния Запад да „обезкърви Руси...
3,BG,BG_670.txt,Украйна,578,584,Antagonist,[Foreign Adversary],Опитът на колективния Запад да „обезкърви Руси...
4,BG,BG_670.txt,украински войници,633,649,Innocent,[Victim],Опитът на колективния Запад да „обезкърви Руси...


#### Now lets clean article text

In [3]:
import re
def labelNum(row):
    if row['class1'] == 'Antagonist':
        return int(0)
    if row['class1'] == 'Innocent':
        return int(1)
    if row['class1'] == 'Protagonist':
        return int(2)
def cleanText(row):
    text = str(row['text'])
    #text = re.sub(r'[^\w\s]', ' ', text)
    text = text.replace('\n',' ').replace('  ', ' ')
    return text
df['label'] = df.apply(labelNum,axis=1)
df['input'] = df.apply(cleanText,axis=1)
df.loc[448]

lang                                                       EN
art_name                                     EN_UA_103861.txt
entity                                                Chinese
start                                                     791
end                                                       797
class1                                             Antagonist
classes2                                                [Spy]
text        The World Needs Peacemaker Trump Again \n\n by...
label                                                       0
input       The World Needs Peacemaker Trump Again  by Jef...
Name: 448, dtype: object

In [4]:
def find_all_substring_start_end(text, substring):
    # Use re.finditer to find all occurrences of the substring in the text
    matches = re.finditer(re.escape(substring), text)
    
    # Collect the start and end indices of all matches
    positions = [(match.start(), match.end()) for match in matches]
    
    return positions
def adjust_start_end(row):
    org_text,cl_text,start,end,entity = str(row['text']),str(row['input']),int(row['start']),int(row['end']),str(row['entity'])
    ss1 = find_all_substring_start_end(org_text,entity)
    ss2 = find_all_substring_start_end(cl_text,entity)
    #print(ss1,ss2)
    #print(row['text'][start:end])
    a = 0
    for i in range(len(ss1)):
        if abs((ss1[i][0] - start) + (ss1[i][1] - end) ) <= 2:
            a = i
            break
    if org_text[ss1[a][0]:ss1[a][1]] != cl_text[ss2[a][0]:ss2[a][1]]:
        print("ERROR!")
    return ss2[a][0],ss2[a][1]
print(df.loc[0])
df['new_start_end'] = df.apply(adjust_start_end,axis=1)

lang                                                       BG
art_name                                           BG_670.txt
entity                                                  Запад
start                                                     152
end                                                       156
class1                                             Antagonist
classes2         [Conspirator, Instigator, Foreign Adversary]
text        Опитът на колективния Запад да „обезкърви Руси...
label                                                       0
input       Опитът на колективния Запад да „обезкърви Руси...
Name: 0, dtype: object


In [5]:
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizerFast

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=3).to(device)
tokenizer = XLMRobertaTokenizerFast.from_pretrained("xlm-roberta-base")

def preprocess_function(examples):
    return tokenizer(examples['input'], padding=True, truncation=True,max_length=8192,return_offsets_mapping=True)

data = df.loc[ : , ['input','label','new_start_end','entity']]
data['tokenized']=data.apply(preprocess_function,axis=1)

Using device: cuda


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
print(data.iloc[0]['tokenized'])

{'input_ids': [0, 1089, 22617, 1669, 29, 47829, 2097, 32275, 69, 137, 197, 35359, 53335, 2827, 40053, 155, 135, 128601, 29, 12747, 226, 49, 94511, 137, 2687, 591, 7533, 135, 10099, 54293, 35, 25977, 245, 131732, 155, 35, 18777, 183, 159814, 153, 1089, 22617, 1669, 29, 47829, 2097, 32275, 69, 137, 197, 35359, 53335, 2827, 40053, 155, 135, 128601, 29, 12747, 226, 49, 94511, 137, 2687, 591, 7533, 135, 10099, 54293, 35, 25977, 245, 131732, 155, 35, 18777, 183, 159814, 4629, 69, 62086, 16846, 33318, 4, 3756, 77, 63084, 15258, 1669, 29, 92173, 59, 6208, 29, 6047, 39540, 197, 14114, 16641, 44267, 5, 61216, 193342, 43219, 84535, 2262, 36690, 45961, 213358, 222, 31458, 2549, 29, 45775, 59, 29, 103285, 245, 34078, 29, 40108, 47239, 303, 3512, 105, 22192, 4, 12434, 47853, 19737, 245, 6, 163308, 183, 109560, 205, 29, 40108, 135694, 25223, 650, 447, 3873, 8458, 63522, 5, 44, 123209, 24724, 2374, 205, 29, 40108, 4, 20292, 35, 4907, 155386, 74300, 4301, 61, 51192, 205, 49, 159814, 19173, 40053, 218, 

In [7]:
#print(data.loc[0]['tokenized'])
def indexes(row):
    off_mask = row['tokenized']['offset_mapping']
    start,end = row['new_start_end'][0],row['new_start_end'][1]
    inds = list()
    for p in range(len(off_mask)):
        if off_mask[p][0] >= start and off_mask[p][1] <= end:
            if p != len(off_mask)-1:
                inds.append(p)
    #if len(inds) > 1:
        #print("GREATER THAN 1")
    if len(inds) == 0:
        print(start,end)
    return inds
data['indexes'] = data.apply(indexes,axis=1)

In [8]:
print(data.loc[448],data.loc[1000])

input            The World Needs Peacemaker Trump Again  by Jef...
label                                                            0
new_start_end                                           (785, 792)
entity                                                     Chinese
tokenized              [input_ids, attention_mask, offset_mapping]
indexes                                                      [180]
Name: 448, dtype: object input            जयपुर में जलवायु परिवर्तन को लेकर स्टेट लेवल ट...
label                                                            2
new_start_end                                           (623, 636)
entity                                               केन्द्र सरकार
tokenized              [input_ids, attention_mask, offset_mapping]
indexes                                                 [162, 163]
Name: 1000, dtype: object


In [9]:
data['list'] = data['tokenized'].apply(lambda x: x['input_ids'])
data['attention'] = data['tokenized'].apply(lambda x: x['attention_mask'])
ids = data['list']
att = data['attention']
indexes = data['indexes']
tids = list()
tatt = list()
print(len(ids),len(att),len(indexes))
for i in range(len(ids)):
    tids.append(torch.tensor(ids[i]).to(device))
    tatt.append(torch.tensor(att[i]).to(device))
print(tids[0],tatt[0])
print(indexes[448])

2535 2535 2535
tensor([     0,   1089,  22617,   1669,     29,  47829,   2097,  32275,     69,
           137,    197,  35359,  53335,   2827,  40053,    155,    135, 128601,
            29,  12747,    226,     49,  94511,    137,   2687,    591,   7533,
           135,  10099,  54293,     35,  25977,    245, 131732,    155,     35,
         18777,    183, 159814,    153,   1089,  22617,   1669,     29,  47829,
          2097,  32275,     69,    137,    197,  35359,  53335,   2827,  40053,
           155,    135, 128601,     29,  12747,    226,     49,  94511,    137,
          2687,    591,   7533,    135,  10099,  54293,     35,  25977,    245,
        131732,    155,     35,  18777,    183, 159814,   4629,     69,  62086,
         16846,  33318,      4,   3756,     77,  63084,  15258,   1669,     29,
         92173,     59,   6208,     29,   6047,  39540,    197,  14114,  16641,
         44267,      5,  61216, 193342,  43219,  84535,   2262,  36690,  45961,
        213358,    222,  

In [25]:
sliced_ids = list()
sliced_ntids = list()
sliced_att = list()
key_inds = list()
key_ids = list()

def slices(index,size,context_size):
    if (size<context_size):
        return 0,size
    lower_c = int(context_size/2-1)
    upper_c = int(context_size/2)
    #print(lower_c,upper_c)
    if index < lower_c:
        return 0,context_size
    elif index >= lower_c:
        if index + upper_c > size:
            return index-(context_size-(size-index)), size
        else:
            return index-lower_c,index+upper_c+1  


for i in range(len(tids)):
    slower,supper = slices(indexes[i][0],len(tids[i]),510)
    #key_tid = tids[i][indexes[i][0]]
    pid = ids[i][slower:supper]
    key_inds.append([])
    for j in indexes[i]: 
        key_id = ids[i][j]
        if key_id not in pid:
           print(len(ids[i]),key_id,slower,supper,indexes[i])
        key_inds[i].append(pid.index(key_id))
    apid = tids[i][slower:supper]
    apatt = tatt[i][slower:supper]
    if 0 not in pid:
        apid = torch.cat((torch.tensor([0]).to(device),apid),dim=0)
        apatt = torch.cat((torch.tensor([1]).to(device),apatt),dim=0)
        for j in range(len(key_inds[i])):
            key_inds[i][j] += 1
    if 2 not in pid:
        apid = torch.cat((apid,torch.tensor([2]).to(device)),dim=0)
        apatt = torch.cat((apatt,torch.tensor([1]).to(device)),dim=0)
    sliced_ids.append(apid)
    sliced_att.append(apatt)

Min = 10000
Max = 0
ind2 = 0
for i in range(len(indexes)):
    if len(sliced_ids[i]) < Min:
        Min = len(sliced_ids[i])
        ind2 = i
        
    if len(sliced_ids[i]) > Max:
        Max = len(sliced_ids[i])
print(len(sliced_ids[500]),len(tids[500]),Min,Max,len(tids[ind2]),ind2,key_inds[500])

511 586 65 512 65 956 [1, 2]


In [28]:
a = list()
for i in key_inds[512]:
    a.append(sliced_ids[512][i])
print(a)
print(tokenizer.batch_decode(a),df['entity'].loc[512])

[tensor(884, device='cuda:0'), tensor(59223, device='cuda:0')]
['Al', 'Gore'] Al Gore


In [29]:
input_ids = list()
att_mask = list()
for ten,att in zip(sliced_ids,sliced_att):
    if len(ten) < 512:
        padding_length = 512 - len(ten)
        padding_tensor = torch.full((padding_length,), tokenizer.pad_token_id, dtype=ten.dtype).to(device)
        padding_tensor2 = torch.full((padding_length,), 0, dtype=att.dtype).to(device)
        ten = torch.cat((ten,padding_tensor),dim=0)
        att = torch.cat((att,padding_tensor2),dim=0)
    input_ids.append(ten)
    att_mask.append(att)
inputIds = torch.stack(input_ids)
attMask = torch.stack(att_mask)
#print(input_ids[300],attMask[300],inputIds.shape,attMask.shape)
print(inputIds)

tensor([[    0,  1089, 22617,  ...,     1,     1,     1],
        [    0,  1089, 22617,  ...,     1,     1,     1],
        [    0,  1089, 22617,  ...,     1,     1,     1],
        ...,
        [    0, 19559,   164,  ...,     5,     2,     1],
        [    0, 19559,   164,  ...,     5,     2,     1],
        [    0, 19559,   164,  ...,     5,     2,     1]], device='cuda:0')


In [30]:
a = list()
for i in key_inds[500]:
    a.append(inputIds[500][i])
print(a)
print(tokenizer.batch_decode(a),df['entity'].loc[500])

[tensor(67151, device='cuda:0'), tensor(59520, device='cuda:0')]
['Boris', 'Johnson'] Boris Johnson


In [31]:
from torch.utils.data import DataLoader, TensorDataset


dataset = TensorDataset(inputIds, attMask)

dataloader = DataLoader(dataset, batch_size=1,shuffle=False)

ind = 0

vectors = []

#print(input_ids_batch)
for batch in dataloader:
    #print(len(batch[0]),batch)
    input_for_model = {
        "input_ids": batch[0],
        "attention_mask" : batch[1]
    }

    with torch.no_grad():  # Disable gradients for inference
        outputs = model(**input_for_model,output_hidden_states=True)
    hidden_states = outputs.hidden_states
    last_hs = hidden_states[-1]
    #print(last_hs.shape)
    for i in range(len(last_hs)):
        vectors.append([])
        for j in range(len(key_inds[ind])):
            vectors[len(vectors)-1].append(last_hs[i][key_inds[ind][j]])
        ind+=1        

In [32]:
print(vectors[500])
print(vectors[448],key_inds[448])

[tensor([ 1.3687e-01,  6.0286e-02, -3.2900e-03,  1.4017e-02,  8.8945e-02,
        -2.8998e-01, -7.5474e-02,  3.1370e-01,  7.2622e-02,  7.4450e-02,
        -1.2063e-01,  7.7085e-03,  5.2945e-01, -2.2475e-01,  7.1912e-04,
         8.5789e-02,  2.2363e-03, -2.1583e-02,  1.8685e-01, -7.6887e-02,
        -1.8216e-01, -1.5473e-03, -3.6373e-02, -5.0588e-04,  1.9936e-01,
        -6.4968e-02,  8.5755e-02, -2.8144e-02,  1.8071e-01, -1.0272e-01,
         6.0687e-02,  1.2576e-02,  7.2960e-03,  9.9497e-02,  3.2894e-02,
         1.6656e-01,  5.5868e-02,  1.4055e-02,  1.2616e-01,  2.1519e-03,
         5.0026e-02, -5.1597e-02,  3.9367e-02,  4.6329e-02,  5.6186e-02,
        -3.7047e-02,  9.8740e-03,  2.9654e-02,  6.1344e-02, -5.9128e-02,
         5.2124e-02, -4.5550e-02,  8.4431e-03,  1.3068e-01,  2.5632e-02,
        -1.0084e-01,  8.5307e-02, -5.6493e-02,  1.2085e-02,  1.7036e-01,
         5.5647e-02,  3.5237e-02,  3.9696e-02, -1.4993e-01,  1.5731e-01,
        -3.3519e-02, -1.3597e-01,  2.8188e-02, -7.

In [34]:
X = list()
for i in range(len(vectors)):
    Sum = 0
    for j in range(len(vectors[i])):
        Sum += vectors[i][j]
    X.append(Sum)
print(len(X))
print(type(X))
X = torch.stack(X)
print(type(X))
print(X.shape)

2535
<class 'list'>
<class 'torch.Tensor'>
torch.Size([2535, 768])


In [35]:
y = data['label']
y = torch.tensor(y).to(device)
print(y[1000],X[1000])

IndexError: too many indices for tensor of dimension 2

In [36]:
from sklearn.model_selection import train_test_split
#print(len(X))
#print(X.shape)  # Should print: torch.Size([length_of_sequence])
#print(type(X))#X_Stack = torch.stack(X)
#print(X_Stack.shape)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.15, random_state=32
)
print(X[0],y[0])
#print(X_train.shape,X_test.shape,)

tensor([ 9.6003e-02,  6.2943e-02, -2.2935e-03,  2.5985e-03,  3.4805e-02,
        -2.1132e-01, -5.6319e-02,  4.2129e-01,  9.8496e-03,  1.2241e-01,
         1.6987e-01, -6.3021e-02,  3.1096e-01,  1.2543e-01,  7.4294e-02,
         7.3950e-02,  5.0596e-02,  5.2429e-03,  1.2735e-01,  7.1985e-02,
         1.1167e-01,  1.3440e-01, -9.7626e-03,  1.7524e-01,  8.2241e-02,
        -7.4658e-02, -2.0493e-02, -1.0414e-01,  1.7133e-01,  2.6297e-02,
         6.4875e-02, -5.9976e-02,  1.0653e-02, -1.0010e-01, -1.9897e-02,
         1.2702e-01,  3.1713e-02, -5.0852e-02, -2.1265e-01,  5.4170e-02,
         3.9952e-02, -3.7870e-02,  6.7580e-02,  3.5146e-02,  6.4087e-02,
         7.5251e-03, -2.8026e-02,  1.3864e-02, -1.9209e-02, -9.9757e-02,
         2.3889e-02, -2.3436e-01,  7.5889e-02,  1.5338e-01, -1.0456e-01,
        -4.8758e-02,  6.8041e-02, -3.7255e-02, -1.9131e-01,  1.7387e-01,
         1.1611e-01,  3.2410e-02,  1.4867e-01,  1.4538e-02,  1.4758e-03,
         3.4975e-02, -7.1496e-02, -3.0925e-03, -2.0

In [40]:
from torch.optim import AdamW
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test,y_test)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64)


In [41]:
import torch.nn as nn
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        # Input to hidden layer
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        # Second hidden layer
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        # Output layer
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

model2 = SimpleNN(input_size=768, hidden_size=128, output_size=3)
model2.to(device)

SimpleNN(
  (fc1): Linear(in_features=768, out_features=128, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=3, bias=True)
)

In [43]:
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import torch.optim as optim



num_epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model2.parameters(), lr=2e-3)
for epoch in range(num_epochs):
    # Training phase
    model2.train()
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0
    
    # Initialize tqdm progress bar for training
    train_progress_bar = tqdm(train_dataloader, desc=f"Training Epoch {epoch + 1}/{num_epochs}")
    
    for batch in train_progress_bar:
        optimizer.zero_grad()
        
        # Move data and labels to device (CPU or GPU)
        data, labels = batch
        data = data.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model2(data)  # Shape will be (batch_size, 3)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Calculate accuracy
        _, preds = torch.max(outputs, 1)  # Correct way to get predicted class
        #print(preds)
        correct_predictions += (preds == labels).sum().item()
        total_predictions += labels.size(0)
        
        # Update tqdm description with current loss
        train_progress_bar.set_postfix({'loss': loss.item()})
    
    avg_train_loss = total_loss / len(train_dataloader)
    train_accuracy = correct_predictions / total_predictions
    
    print(f"Epoch {epoch + 1}/{num_epochs}")
    print(f"Training loss: {avg_train_loss:.4f}, Training accuracy: {train_accuracy:.4f}")
    
    # Test phase
    model2.eval()
    test_loss = 0
    correct_test_predictions = 0
    total_test_predictions = 0
    
    test_progress_bar = tqdm(test_dataloader, desc=f"Test Epoch {epoch + 1}/{num_epochs}")
    
    with torch.no_grad():
        for batch in test_progress_bar:
            data, labels = batch
            data = data.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model2(data)
            loss = criterion(outputs, labels)
            
            test_loss += loss.item()
            
            # Calculate test accuracy
            preds = torch.argmax(outputs, dim=-1)  # Correct way to get predicted class
            #print(preds)
            correct_test_predictions += (preds == labels).sum().item()
            total_test_predictions += labels.size(0)
            
            test_progress_bar.set_postfix({'loss': loss.item()})
    
    avg_test_loss = test_loss / len(test_dataloader)
    test_accuracy = correct_test_predictions / total_test_predictions
    
    print(f"Test loss: {avg_test_loss:.4f}, Test accuracy: {test_accuracy:.4f}")


Training Epoch 1/10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 268.48it/s, loss=0.734]


Epoch 1/10
Training loss: 0.9336, Training accuracy: 0.5608


Test Epoch 1/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 435.51it/s, loss=0.803]


Test loss: 0.8362, Test accuracy: 0.6299


Training Epoch 2/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 251.57it/s, loss=1.2]


Epoch 2/10
Training loss: 0.7432, Training accuracy: 0.6894


Test Epoch 2/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 372.58it/s, loss=0.684]


Test loss: 0.7761, Test accuracy: 0.6798


Training Epoch 3/10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 284.96it/s, loss=0.509]


Epoch 3/10
Training loss: 0.6548, Training accuracy: 0.7266


Test Epoch 3/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 461.75it/s, loss=0.667]


Test loss: 0.7434, Test accuracy: 0.7008


Training Epoch 4/10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 293.65it/s, loss=0.591]


Epoch 4/10
Training loss: 0.6026, Training accuracy: 0.7535


Test Epoch 4/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 458.22it/s, loss=0.845]


Test loss: 0.7668, Test accuracy: 0.7008


Training Epoch 5/10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 279.84it/s, loss=0.936]


Epoch 5/10
Training loss: 0.5588, Training accuracy: 0.7758


Test Epoch 5/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 452.83it/s, loss=0.907]


Test loss: 0.7724, Test accuracy: 0.6719


Training Epoch 6/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 297.13it/s, loss=0.41]


Epoch 6/10
Training loss: 0.5137, Training accuracy: 0.7916


Test Epoch 6/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 496.41it/s, loss=0.788]


Test loss: 0.7745, Test accuracy: 0.7113


Training Epoch 7/10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 300.70it/s, loss=0.635]


Epoch 7/10
Training loss: 0.4992, Training accuracy: 0.7823


Test Epoch 7/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 422.99it/s, loss=0.757]


Test loss: 0.7911, Test accuracy: 0.7113


Training Epoch 8/10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 288.48it/s, loss=0.436]


Epoch 8/10
Training loss: 0.4723, Training accuracy: 0.8055


Test Epoch 8/10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 534.44it/s, loss=1.11]


Test loss: 0.8052, Test accuracy: 0.7139


Training Epoch 9/10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 352.97it/s, loss=0.401]


Epoch 9/10
Training loss: 0.4201, Training accuracy: 0.8268


Test Epoch 9/10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 678.96it/s, loss=0.971]


Test loss: 0.8416, Test accuracy: 0.6929


Training Epoch 10/10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 350.29it/s, loss=0.242]


Epoch 10/10
Training loss: 0.4044, Training accuracy: 0.8366


Test Epoch 10/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 658.79it/s, loss=0.866]

Test loss: 0.8364, Test accuracy: 0.7244



