# NDSSL Node Classification - Zipcode

**To Do**:
- Wy is the GCN/graphSAGE approach so fast/why is the MLP approach so slow?
- The MPNN approaches beat the MLP approach, but neither are as good as I expected
- I tried using class weights, and results aren't great
- does it learn that everyone in a household has the same income?
- add more descriptive text

In [1]:
import torch
from torch_geometric.data import Data, InMemoryDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv
from torch_geometric.data import GraphSAINTRandomWalkSampler
from torch_geometric.utils import get_laplacian, degree
import pandas as pd
import numpy as np
import itertools
from tqdm.auto import tqdm, trange

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.style as style 
style.use('seaborn-paper')

fontsize = 12
plt.rcParams.update({
    'font.size': fontsize, 
    'axes.labelsize': fontsize, 
    'legend.fontsize': fontsize,
    'xtick.labelsize': fontsize,
    'ytick.labelsize': fontsize,
    'axes.titlesize': fontsize
                    })

In [2]:
#from imports import *
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print(device)

cuda:0


## Data processing

Convert the node attribute data into an (x,y) format

In [3]:
node_attributes = pd.read_csv('../../data/NDSSL data/raw/node_attributes.csv')

## one-hot encode gender
gender_index = torch.LongTensor(node_attributes['gender'].values - 1).type(torch.int64).reshape((len(node_attributes), 1))
gender_onehot = torch.LongTensor(len(node_attributes), 2)
gender_onehot.zero_()
gender_onehot = gender_onehot.scatter_(1, gender_index, 1).type(torch.float32);

## one-hot encode worker
worker_index = torch.LongTensor(node_attributes['worker'].values - 1).type(torch.int64).reshape((len(node_attributes), 1))
worker_onehot = torch.LongTensor(len(node_attributes), 2)
worker_onehot.zero_()
worker_onehot = worker_onehot.scatter_(1, worker_index, 1).type(torch.float32);

## map the 117 distinct zipcodes to the integers 0, ..., 116
zipcode_original = node_attributes['zipcode'].values
zipcode_dict = {i: j for j, i in enumerate(set(zipcode_original))} 
zipcode_index = torch.LongTensor(np.asarray([zipcode_dict[i] for i in zipcode_original])).type(torch.int64).reshape((len(node_attributes), 1))

## one-hot encode zipcode
zipcode_onehot = torch.LongTensor(len(node_attributes), len(zipcode_dict))
zipcode_onehot.zero_()
zipcode_onehot = zipcode_onehot.scatter_(1, zipcode_index, 1).type(torch.float32);

## one-hot encode household income
household_income_index = torch.LongTensor(node_attributes['household_income'].values - 1).type(torch.int64).reshape((len(node_attributes), 1))
household_income_onehot = torch.LongTensor(len(node_attributes), 14)
household_income_onehot.zero_()
household_income_onehot = household_income_onehot.scatter_(1, household_income_index, 1).type(torch.float32);

## one-hot encode relationship
relationship_index = torch.LongTensor(node_attributes['relationship'].values - 1).type(torch.int64).reshape((len(node_attributes), 1))
relationship_onehot = torch.LongTensor(len(node_attributes), 4)
relationship_onehot.zero_()
relationship_onehot = relationship_onehot.scatter_(1, relationship_index, 1).type(torch.float32);

age = torch.FloatTensor(node_attributes['age'].values).reshape(len(node_attributes), 1).type(torch.float32)
household_size = torch.FloatTensor(node_attributes['household_size'].values).reshape(len(node_attributes), 1).type(torch.float32)
household_workers = torch.FloatTensor(node_attributes['household_workers'].values).reshape(len(node_attributes), 1).type(torch.float32)
household_vehicles = torch.FloatTensor(node_attributes['household_vehicles'].values).reshape(len(node_attributes), 1).type(torch.float32)

In [4]:
x = torch.cat((gender_onehot, age, relationship_onehot, worker_onehot, household_income_onehot, household_size, household_workers, household_vehicles), dim=1)
y = zipcode_index[:,0]

print(x.shape, x.dtype)
print(y.shape, y.dtype)

torch.Size([1601330, 26]) torch.float32
torch.Size([1601330]) torch.int64


train/test split

In [5]:
## create a train/test split by household
household_ids = list(set(list(node_attributes['household_id'])))
np.random.shuffle(household_ids)

ntrain_households = int(0.8 * len(household_ids))
households_train = household_ids[:ntrain_households]
households_test = household_ids[ntrain_households:]
print('number of distinct households: %i' %len(household_ids))
print('number of training households: %i' %len(households_train))
print('number of testing households: %i' %len(households_test))

train_mask = torch.IntTensor(node_attributes['household_id'].isin(households_train)).type(torch.int64)
test_mask = torch.IntTensor(node_attributes['household_id'].isin(households_test)).type(torch.int64)
train_idx = np.arange(len(node_attributes))[train_mask == 1]
test_idx = np.arange(len(node_attributes))[test_mask == 1]

print('size of training set: %i' %torch.sum(train_mask).item())
print('size of testing set: %i' %torch.sum(test_mask).item())
print('percent train: %.2f' %(torch.sum(train_mask).item()/len(train_mask)))

number of distinct households: 632626
number of training households: 506100
number of testing households: 126526
size of training set: 1281023
size of testing set: 320307
percent train: 0.80


save

In [6]:
torch.save(x, '../../data/NDSSL data/raw/x.pt')
torch.save(y, '../../data/NDSSL data/raw/y.pt')
torch.save(train_mask, '../../data/NDSSL data/raw/train_mask.pt')
torch.save(test_mask, '../../data/NDSSL data/raw/test_mask.pt')
torch.save(train_idx, '../../data/NDSSL data/raw/train_idx.pt')
torch.save(test_idx, '../../data/NDSSL data/raw/test_idx.pt')

## Node classification: MLP 

In [7]:
full_set = torch.utils.data.TensorDataset(x, y)
train_set = torch.utils.data.Subset(full_set, train_idx)
test_set = torch.utils.data.Subset(full_set, test_idx)

In [8]:
class mlp(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(mlp, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
        self.fc2 = torch.nn.Linear(self.hidden_size, self.hidden_size)
        self.fc3 = torch.nn.Linear(self.hidden_size, self.num_classes)
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = self.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.fc3(x)    
        x = x.log_softmax(dim=-1)
        return x

In [9]:
def train():
    model.train()

    total_loss = total_examples = 0
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        out = model(x_batch)
        loss = F.nll_loss(out, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * x_batch.shape[0]
        total_examples += x_batch.shape[0]
    
    return total_loss/total_examples


@torch.no_grad()
def test():
    model.eval()
    
    ## train accuracy
    total_examples = correct = 0.0
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        out = model(x_batch)
        total_examples += out.shape[0]
        correct += torch.sum(torch.argmax(out, axis=1) == y_batch).cpu().item()
    train_accuracy = correct/total_examples
    
    ## test accuracy
    total_examples = correct = 0.0
    for x_batch, y_batch in test_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        out = model(x_batch)
        total_examples += x_batch.shape[0]
        correct += torch.sum(torch.argmax(out, axis=1) == y_batch).cpu().item()
    test_accuracy = correct/total_examples
    
    return train_accuracy, test_accuracy


**Question: why is this so slow, even with a GPU?**

In [None]:
model = mlp(x.shape[1], 1000, 117)
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)
print('number of trainable parameters %i' %sum(p.numel() for p in model.parameters() if p.requires_grad))

epochs = 50
batch_size = 1000
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True)

for epoch in range(0, epochs):
    loss = train()
    accs = test()
    print(f'Epoch: {epoch+1:02d}, Loss: {loss:.4f}, Train: {accs[0]:.4f}, 'f'Test: {accs[1]:.4f}')

number of trainable parameters 1145117
Epoch: 01, Loss: 4.7883, Train: 0.0266, Test: 0.0260
Epoch: 02, Loss: 4.5907, Train: 0.0257, Test: 0.0255
Epoch: 03, Loss: 4.5413, Train: 0.0264, Test: 0.0262
Epoch: 04, Loss: 4.5127, Train: 0.0267, Test: 0.0263
Epoch: 05, Loss: 4.4949, Train: 0.0266, Test: 0.0262
Epoch: 06, Loss: 4.4833, Train: 0.0270, Test: 0.0265
Epoch: 07, Loss: 4.4746, Train: 0.0271, Test: 0.0267
Epoch: 08, Loss: 4.4684, Train: 0.0283, Test: 0.0277
Epoch: 09, Loss: 4.4641, Train: 0.0290, Test: 0.0285
Epoch: 10, Loss: 4.4610, Train: 0.0299, Test: 0.0297
Epoch: 11, Loss: 4.4578, Train: 0.0296, Test: 0.0295
Epoch: 12, Loss: 4.4554, Train: 0.0273, Test: 0.0266
Epoch: 13, Loss: 4.4536, Train: 0.0290, Test: 0.0283
Epoch: 14, Loss: 4.4518, Train: 0.0285, Test: 0.0279
Epoch: 15, Loss: 4.4502, Train: 0.0299, Test: 0.0295
Epoch: 16, Loss: 4.4488, Train: 0.0300, Test: 0.0295
Epoch: 17, Loss: 4.4473, Train: 0.0293, Test: 0.0291
Epoch: 18, Loss: 4.4466, Train: 0.0292, Test: 0.0291
Epoch: 

## Node classification: Message Passing NNs

In [7]:
class NDSSLDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(NDSSLDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def raw_file_names(self):
        return ['edge_list.csv', 'x_zipcode.pt', 'y_zipcode.pt', 'train_mask.pt', 'test_mask.pt', 'edge_attributes.csv']

    @property
    def processed_file_names(self):
        return ['NDSSL_graph_full_worker.pt']

    def process(self):
        data_list = []
        
        ## load the edge list
        edge_list = pd.read_csv(self.raw_paths[0], dtype=int) - 2000000 #the node id's start at 2000000, shift these to start at 0         
        
        ## format the edge list
        target_nodes = edge_list.iloc[:,0].values
        source_nodes = edge_list.iloc[:,1].values
        edge_index = torch.tensor([source_nodes, target_nodes], dtype=torch.int64)

        ## load the (x,y) formatted data
        x = torch.load(self.raw_paths[1], map_location=torch.device('cpu'))
        y = torch.load(self.raw_paths[2], map_location=torch.device('cpu'))
        train_mask = torch.load(self.raw_paths[3], map_location=torch.device('cpu')) == 1 
        test_mask = torch.load(self.raw_paths[4], map_location=torch.device('cpu')) == 1 

        ## set the edge weights to be the duration (in hours)
        edge_attributes = pd.read_csv(self.raw_paths[5])['duration'].values/3600
        duration =  torch.FloatTensor(edge_attributes)
        ## previous approaches used the degree:
        #row, col = data.edge_index
        #data.edge_attr = (1. / degree(col, data.num_nodes)[col]).double()
        
        ## build the data
        data = Data(edge_index=edge_index, x=x, y=y, train_mask=train_mask, test_mask=test_mask)
        data.edge_weight = duration
        data.train_mask = train_mask
        data.test_mask = test_mask
        #data.train_mask = torch.cat((torch.ones(n_train, dtype=torch.bool), torch.zeros(n_val, dtype=torch.bool), torch.zeros(n_test, dtype=torch.bool)), dim=0)
        #data.test_mask = torch.cat((torch.zeros(n_train, dtype=torch.bool), torch.zeros(n_val, dtype=torch.bool), torch.ones(n_test, dtype=torch.bool)), dim=0)

        print(data.__dict__)
        data, slices = self.collate([data])
        torch.save((data, slices), self.processed_paths[0])

In [8]:
## remove old processed files
import shutil
shutil.rmtree('../../data/NDSSL data/processed')

## shuffle the masks
dataset = NDSSLDataset('../../data/NDSSL data/')
dataset.process()
data = dataset[0]

Processing...
{'x': tensor([[ 1.,  0., 42.,  ...,  3.,  2.,  3.],
        [ 0.,  1., 43.,  ...,  3.,  2.,  3.],
        [ 1.,  0., 17.,  ...,  3.,  2.,  3.],
        ...,
        [ 1.,  0.,  0.,  ...,  8.,  1.,  2.],
        [ 0.,  1., 60.,  ...,  1.,  1.,  1.],
        [ 0.,  1., 79.,  ...,  1.,  0.,  1.]]), 'edge_index': tensor([[      0,       0,       1,  ..., 1486224, 1378614, 1556530],
        [      1,       2,       2,  ..., 1601329, 1601329, 1601329]]), 'edge_attr': None, 'y': tensor([79, 79, 79,  ..., 36, 45, 36]), 'pos': None, 'norm': None, 'face': None, 'train_mask': tensor([True, True, True,  ..., True, True, True]), 'test_mask': tensor([False, False, False,  ..., False, False, False]), 'edge_weight': tensor([10.9161, 12.7494, 12.5828,  ...,  0.0497,  0.1667,  0.1667])}
Done!
{'x': tensor([[ 1.,  0., 42.,  ...,  3.,  2.,  3.],
        [ 0.,  1., 43.,  ...,  3.,  2.,  3.],
        [ 1.,  0., 17.,  ...,  3.,  2.,  3.],
        ...,
        [ 1.,  0.,  0.,  ...,  8.,  1.,  2.

In [9]:
dataset.__dict__

{'root': '../../data/NDSSL data',
 'transform': None,
 'pre_transform': None,
 'pre_filter': None,
 '__indices__': None,
 'data': Data(edge_index=[2, 19681821], edge_weight=[19681821], test_mask=[1601330], train_mask=[1601330], x=[1601330, 26], y=[1601330]),
 'slices': {'x': tensor([      0, 1601330]),
  'edge_index': tensor([       0, 19681821]),
  'y': tensor([      0, 1601330]),
  'train_mask': tensor([      0, 1601330]),
  'test_mask': tensor([      0, 1601330]),
  'edge_weight': tensor([       0, 19681821])}}

Note: I was initially having a hard time with GraphSAINT, and even for a simple example using the CORA dataset. My computer was crashing after pytorch ate up all the RAM. I believe this is realted to [this issue](https://github.com/rusty1s/pytorch_geometric/issues/1331). The problem was fixed when I set `num_workers=0`.

In [10]:
loader = GraphSAINTRandomWalkSampler(data, batch_size=6000, walk_length=2,
                                     num_steps=5, sample_coverage=10,
                                     save_dir=dataset.processed_dir,
                                     num_workers=0)

Compute GraphSAINT normalization: : 17831555it [00:12, 1384473.80it/s]                            


### GCN

documentation: https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html?highlight=GCNConv#torch_geometric.nn.conv.GCNConv

In [11]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        in_channels = dataset.num_node_features
        out_channels = dataset.num_classes
        self.conv1 = GCNConv(in_channels, hidden_channels) 
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels, out_channels)
        
    def forward(self, x, edge_index, edge_weight=None):
        x = F.relu(self.conv1(x, edge_index, edge_weight))
        x = F.dropout(x, p=0.2, training=self.training)
        x = F.relu(self.conv2(x, edge_index, edge_weight))
        x = F.dropout(x, p=0.2, training=self.training)
        #x = F.relu(self.conv3(x, edge_index, edge_weight))
        #x = F.dropout(x, p=0.2, training=self.training)
        x = self.lin(x)
        x = x.log_softmax(dim=-1)
        return x

In [12]:
model = GCN(1000).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
print('number of trainable parameters %i' %sum(p.numel() for p in model.parameters() if p.requires_grad))

number of trainable parameters 2146117


In [13]:
def train():
    model.train()

    total_loss = total_examples = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_weight)
        batch_size = out[data.train_mask].shape[0]
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch_size
        total_examples += batch_size
    return total_loss / total_examples

@torch.no_grad()
def test():
    model.eval()
    
    total_examples_train = correct_train = 0.0
    total_examples_test = correct_test = 0.0    
    for data in loader:
        data = data.to(device)
        out = model(data.x, data.edge_index)#, data.edge_weight)
        
        ## evaluate the train/test accuracies
        train_mask = data.train_mask
        test_mask = data.test_mask
        total_examples_train += torch.sum(train_mask).item()
        total_examples_test += torch.sum(test_mask).item()
        correct = (torch.argmax(out, axis=1) == data.y)
        
        correct_train += torch.sum(correct * train_mask).cpu().item()        
        correct_test += torch.sum(correct * test_mask).cpu().item()        
    
    overall_accuracy_train = correct_train/total_examples_train    
    overall_accuracy_test = correct_test/total_examples_test    
    
    return overall_accuracy_train, overall_accuracy_test

In [14]:
for epoch in range(50):
    loss = train()
    accs = test()
    print(f'Epoch: {epoch+1:02d}, Loss: {loss:.4f}, Train: {accs[0]:.4f}, 'f'Test: {accs[1]:.4f}')

Epoch: 01, Loss: 4.9592, Train: 0.0254, Test: 0.0271
Epoch: 02, Loss: 4.7648, Train: 0.0197, Test: 0.0186
Epoch: 03, Loss: 4.7176, Train: 0.0205, Test: 0.0206
Epoch: 04, Loss: 4.6975, Train: 0.0269, Test: 0.0242
Epoch: 05, Loss: 4.6705, Train: 0.0249, Test: 0.0252
Epoch: 06, Loss: 4.6605, Train: 0.0267, Test: 0.0294
Epoch: 07, Loss: 4.6447, Train: 0.0273, Test: 0.0247
Epoch: 08, Loss: 4.6374, Train: 0.0279, Test: 0.0278
Epoch: 09, Loss: 4.6297, Train: 0.0285, Test: 0.0285
Epoch: 10, Loss: 4.6212, Train: 0.0283, Test: 0.0243
Epoch: 11, Loss: 4.6172, Train: 0.0288, Test: 0.0281
Epoch: 12, Loss: 4.6097, Train: 0.0302, Test: 0.0281
Epoch: 13, Loss: 4.6046, Train: 0.0290, Test: 0.0278
Epoch: 14, Loss: 4.5974, Train: 0.0290, Test: 0.0272
Epoch: 15, Loss: 4.5952, Train: 0.0293, Test: 0.0280
Epoch: 16, Loss: 4.5881, Train: 0.0290, Test: 0.0257
Epoch: 17, Loss: 4.5864, Train: 0.0285, Test: 0.0302
Epoch: 18, Loss: 4.5815, Train: 0.0286, Test: 0.0279
Epoch: 19, Loss: 4.5785, Train: 0.0287, Test: 

### GraphSAGE

In [15]:
class SAGE(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(SAGE, self).__init__()
        in_channels = dataset.num_node_features
        out_channels = dataset.num_classes
        self.conv1 = SAGEConv(in_channels, hidden_channels) 
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.lin = torch.nn.Linear(2 * hidden_channels, out_channels)
        
    def set_aggr(self, aggr):
        self.conv1.aggr = aggr
        self.conv2.aggr = aggr
        self.conv3.aggr = aggr

    def forward(self, x0, edge_index, edge_weight=None):
        x1 = F.relu(self.conv1(x0, edge_index, edge_weight))
        x1 = F.dropout(x1, p=0.2, training=self.training)
        
        x2 = F.relu(self.conv2(x1, edge_index, edge_weight))
        x2 = F.dropout(x2, p=0.2, training=self.training)
        
        #x3 = F.relu(self.conv3(x2, edge_index, edge_weight))
        #x3 = F.dropout(x3, p=0.2, training=self.training)
        
        x = torch.cat([x1, x2], dim=-1)
        x = self.lin(x)
        
        return x.log_softmax(dim=-1)

In [16]:
model = SAGE(1000).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
print('number of trainable parameters %i' %sum(p.numel() for p in model.parameters() if p.requires_grad))

number of trainable parameters 4289117


In [17]:
def train():
    model.train()

    total_loss = total_examples = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_weight)
        batch_size = out[data.train_mask].shape[0]
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch_size
        total_examples += batch_size
    return total_loss / total_examples


@torch.no_grad()
def test():
    model.eval()
    model.set_aggr('mean')
    
    total_examples_train = correct_train = 0.0
    total_examples_test = correct_test = 0.0    
    for data in loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.edge_weight)
        
        ## evaluate the train/test accuracies
        train_mask = data.train_mask
        test_mask = data.test_mask
        total_examples_train += torch.sum(train_mask).item()
        total_examples_test += torch.sum(test_mask).item()
        correct = (torch.argmax(out, axis=1) == data.y)
        
        correct_train += torch.sum(correct * train_mask).cpu().item()        
        correct_test += torch.sum(correct * test_mask).cpu().item()        
    
    overall_accuracy_train = correct_train/total_examples_train    
    overall_accuracy_test = correct_test/total_examples_test    
    
    return overall_accuracy_train, overall_accuracy_test

In [18]:
for epoch in range(50):
    loss = train()
    accs = test()
    print(f'Epoch: {epoch+1:02d}, Loss: {loss:.4f}, Train: {accs[0]:.4f}, 'f'Test: {accs[1]:.4f}')

Epoch: 01, Loss: 12.3795, Train: 0.0194, Test: 0.0212
Epoch: 02, Loss: 9.7682, Train: 0.0187, Test: 0.0193
Epoch: 03, Loss: 9.1737, Train: 0.0203, Test: 0.0203
Epoch: 04, Loss: 8.7102, Train: 0.0197, Test: 0.0198
Epoch: 05, Loss: 8.1069, Train: 0.0185, Test: 0.0190
Epoch: 06, Loss: 7.7543, Train: 0.0225, Test: 0.0214
Epoch: 07, Loss: 7.4228, Train: 0.0220, Test: 0.0211
Epoch: 08, Loss: 7.1407, Train: 0.0219, Test: 0.0225
Epoch: 09, Loss: 6.8851, Train: 0.0214, Test: 0.0223
Epoch: 10, Loss: 6.7615, Train: 0.0213, Test: 0.0221
Epoch: 11, Loss: 6.6129, Train: 0.0224, Test: 0.0226
Epoch: 12, Loss: 6.5228, Train: 0.0224, Test: 0.0241
Epoch: 13, Loss: 6.4420, Train: 0.0233, Test: 0.0222
Epoch: 14, Loss: 6.3383, Train: 0.0251, Test: 0.0253
Epoch: 15, Loss: 6.3408, Train: 0.0260, Test: 0.0251
Epoch: 16, Loss: 6.2682, Train: 0.0257, Test: 0.0259
Epoch: 17, Loss: 6.2273, Train: 0.0240, Test: 0.0263
Epoch: 18, Loss: 6.1893, Train: 0.0274, Test: 0.0282
Epoch: 19, Loss: 6.1466, Train: 0.0248, Test:

In [None]:
## confusion matrix

In [None]:
1/117