## p1. 拆解"缓存准备.py"文件

In [3]:
import sys

sys.path.append("../../src/")

In [4]:
import argparse

import numpy as np

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.optim import SGD
from torch.utils.data import DataLoader

from util.util import enumerateWithEstimate
from util.logconf import logging

from p2ch11.dsets import LunaDataset
from p2ch11.model import LunaModel

In [5]:
log = logging.getLogger(__name__)
# log.setLevel(logging.WARN)
log.setLevel(logging.INFO)
# log.setLevel(logging.DEBUG)

注意需要修改p2ch11中dsets.py中的路径问题

In [9]:
batch_size = 128
num_workers = 8

prep_dl = DataLoader(
    LunaDataset(
        sortby_str="series_uid",
    ),
    batch_size=batch_size,
    num_workers=num_workers,
)

2024-06-28 10:02:09,508 INFO     pid:29469 p2ch11.dsets:198:__init__ <p2ch11.dsets.LunaDataset object at 0x7fad5f193250>: 110143 training samples


In [10]:
import datetime

In [11]:
print(f"Started at {datetime.datetime.now()}")
batch_iter = enumerateWithEstimate(
    prep_dl,
    "Stuffing cache",
    start_ndx=prep_dl.num_workers,
)
for _ in batch_iter:
    pass
print(f"Finished at {datetime.datetime.now()}")



Started at 2024-06-28 10:02:14.998712


2024-06-28 10:05:35,428 INFO     pid:29469 util.util:236:enumerateWithEstimate Stuffing cache   64/861, done at 2024-06-28 10:49:42, 0:47:16
2024-06-28 11:36:48,904 INFO     pid:29469 util.util:236:enumerateWithEstimate Stuffing cache  256/861, done at 2024-06-28 15:25:45, 5:23:19


Finished at 2024-06-28 14:08:21.038947


## p2. 拆解train.py文件

In [None]:
num_workers = 8
batch_size = 32
epochs = 1
tb_prefix = "p2ch11"
comment = "dlwpt"

time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
trn_writer = None
val_writer = None
total_training_samples = 0

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

device

: 

### 1. 创建模型

In [None]:
def initModel():
    model = LunaModel()
    if use_cuda:
        log.info(f"Using CUDA; {torch.cuda.device_count()} devices.")
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model = model.to(device)
    return model

: 

In [None]:
def initOptimizer():
    return SGD(model.parameters(), lr=0.001, momentum=0.99)
    # return Adam(self.model.parameters())

In [None]:
def initTrainDl(batch_size=64):
    train_ds = LunaDataset(
        val_stride=10,
        isValSet_bool=False,
    )

    if self.use_cuda:
        batch_size *= torch.cuda.device_count()

    train_dl = DataLoader(
        train_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=use_cuda,
    )

    return train_dl

In [None]:
def initValDl(batch_size=64):
    val_ds = LunaDataset(
        val_stride=10,
        isValSet_bool=True,
    )

    if use_cuda:
        batch_size *= torch.cuda.device_count()

    val_dl = DataLoader(
        val_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=use_cuda,
    )

    return val_dl