In [1]:
from segment_dataloader import Segment_dataloader
from torch.utils.data import DataLoader
import numpy as np

In [2]:
batch_size = 3
trainset = Segment_dataloader(mode="train")
valset = Segment_dataloader(mode="val")
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(valset, batch_size=batch_size, shuffle=False)

In [3]:
import torch
from model.cnn_lstm import CNN_LSTM

cuda = True
if cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    
model = CNN_LSTM(100,25, 300, 13)
model = model.to(device)

# weight_path = "/home2/kataoka/echigo_hudagami/iwaka/operation/model_weight/lstm/operation_weight_exp1.pth"
# checkpoint = torch.load(weight_path)
# model.load_state_dict(checkpoint)

In [4]:
import torch.nn as nn
import torch
import os
from tqdm import tqdm
from utils.loss import SegmentationLosses
from utils.lr_scheduler import LR_Scheduler
from utils.saver import Saver
from utils.metrics import Evaluator
optimizer = torch.optim.Adam(model.parameters(), weight_decay=4e-5)
criterion = SegmentationLosses(cuda=cuda).build_loss(mode="focal")
evaluator = Evaluator(13)
scheduler = LR_Scheduler("poly", 0.001, 100, len(train_loader))

Using poly LR Scheduler!


In [5]:
def training(epoch, best_pred):
    train_loss = 0.0
    model.train()
    tbar = tqdm(train_loader)
    num_img_tr = len(train_loader)
    for i, sample in enumerate(tbar):
        image, target = sample['image'], sample['label']
        if cuda:
            image, target = image.cuda(), target.cuda()
        scheduler(optimizer, i, epoch, best_pred)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        tbar.set_description('Train loss: %.7f' % (train_loss / (i + 1)))

        # Show 10 * 3 inference results each epoch
        if i % (num_img_tr // 10) == 0:
            global_step = i + num_img_tr * epoch

    print('[Epoch: %d, numImages: %5d]' % (epoch, i * batch_size + image.data.shape[0]))
    print('Loss: %.7f' % train_loss)
    return train_loss

def validation(epoch, best_pred, best_loss):
    model.eval()
    evaluator.reset()
    tbar = tqdm(val_loader, desc='\r')
    test_loss = 0.0
    for i, sample in enumerate(tbar):
        image, target = sample['image'], sample['label']
        if cuda:
            image, target = image.cuda(), target.cuda()
        with torch.no_grad():
            output = model(image)
        
        loss = criterion(output, target)
        test_loss += loss.item()
        tbar.set_description('Test loss: %.7f' % (test_loss / (i + 1)))
        pred = output.data.cpu().numpy()
        target = target.cpu().numpy()
        # 三次元配列をargmaxで二次元配列にできないから、batchを１づつargmaxにする
        
        reshape_pred = np.zeros((2,100),dtype=int)
        for index in range(batch_size):
            reshape_pred[index] = np.argmax(pred[index], axis=1)
        # Add batch sample into evaluator
        evaluator.add_batch(target, reshape_pred)

    # Fast test during the training
    Acc = evaluator.Pixel_Accuracy()
    Acc_class = evaluator.Pixel_Accuracy_Class()
    mIoU = evaluator.Mean_Intersection_over_Union()
    FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union()
    print('Validation:')
    print('[numImages: %5d]' % (i * batch_size + image.data.shape[0]))
    print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU))
    print('Loss: %.7f' % test_loss)

    new_pred = mIoU
    if new_pred > best_pred:
        is_best = True
        best_pred = new_pred
    if test_loss < best_loss:
        print("improve {0} to {1}. save checkpoint.".format(best_loss, test_loss))
        torch.save(model.state_dict(), "./model_weigth/cnn_lstm/weight_exp{}.pth".format(exp))
        best_loss = test_loss
    return best_pred, best_loss

In [None]:
#学習
exp = 1

print("start learning.")
best_pred = 0
best_loss = 1000
t_losses = []
v_losses = []
for epoch in range(10):
    #t_loss, v_loss = np.array([0.0]), np.array([0.0])
    t_loss = training(epoch, best_pred)
    best_pred, best_loss = validation(epoch, best_pred, best_loss)
    t_losses += [t_loss]
    v_losses += [best_loss]
#     if v_loss < best_loss:
#         best_loss = v_loss

  0%|          | 0/18194 [00:00<?, ?it/s]

start learning.

=>Epoches 0, learning rate = 0.0010,                 previous best = 0.0000


Train loss: 0.0030327:  13%|█▎        | 2333/18194 [2:46:23<19:22:27,  4.40s/it]

In [None]:
import matplotlib.pyplot as plt
plt.plot(t_losses)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.grid()
plt.savefig('./train_loss.png') 

plt.figure()
plt.plot(v_losses)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.grid()
plt.savefig('./v_loss.png') 

In [7]:
t_losses

[]

In [8]:
target

tensor([1, 0, 0])

In [7]:
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)

In [8]:
input.shape

torch.Size([3, 5])

In [9]:
target.shape

torch.Size([3])

In [1]:
!nvidia-smi

Sat Jan 18 13:13:24 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.48                 Driver Version: 410.48                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 208...  Off  | 00000000:65:00.0  On |                  N/A |
| 44%   45C    P8     9W / 250W |     45MiB / 10988MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 208...  Off  | 00000000:B3:00.0 Off |                  N/A |
| 29%   43C    P8    12W / 250W |     11MiB / 10989MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------