# Train

データセットを結合して、学習します。

Jetson Orin Nanoでは、2000枚のデータセットでも1時間程度で学習はおわります。Jetson Nanoでは、2000枚のデータセットの学習には10時間程度かかります。Jetson Nanoユーザは、300枚を超える学習時は、下記URLから起動できるColabを試してください。

https://colab.research.google.com/drive/1GbDrNiosTKSJNOJiCiVgv6V8X-0GDBfW?usp=sharing

In [1]:
import Jetson.GPIO as GPIO

BOARD_NAME = GPIO.gpio_pin_data.get_data()[0]

mode_descriptions = {
    "JETSON_NX": ["15W_2CORE", "15W_4CORE", "15W_6CORE", "10W_2CORE", "10W_4CORE"],
    "JETSON_XAVIER": ["MAXN", "MODE_10W", "MODE_15W", "MODE_30W"],
    "JETSON_NANO": ["MAXN", "5W"],
    "JETSON_ORIN": ["MAXN", "MODE_15W", "MODE_30W", "MODE_40W"],
    "JETSON_ORIN_NANO": ["MODE_15W", "MODE_7W"]
}

product_names = {
    "JETSON_NX": "Jetson Xavier NX",
    "JETSON_XAVIER": "Jetson AGX Xavier",
    "JETSON_NANO": "Jetson Nano",
    "JETSON_ORIN": "Jetson AGX Orin",
    "JETSON_ORIN_NANO": "Jetson Orin Nano"
}

# ボードごとのI2Cバス番号と初期Powerモードを定義する
board_settings = {
    "JETSON_NX": (8, 3),
    "JETSON_XAVIER": (8, 2),
    "JETSON_NANO": (1, 0),
    "JETSON_ORIN": (7, 0),
    "JETSON_ORIN_NANO": (7, 1)
}

i2c_busnum, power_mode = board_settings.get(BOARD_NAME, (None, None))
mode_description = mode_descriptions.get(BOARD_NAME, [])
product_name = product_names.get(BOARD_NAME, "未知のボード")

if power_mode is not None and power_mode < len(mode_description):
    mode_str = mode_description[power_mode]
    print("------------------------------------------------------------")
    print(f"{product_name}を認識: I2Cバス番号: {i2c_busnum}, Powerモード: {mode_str}({power_mode})に設定します。")
    print("------------------------------------------------------------")
else:
    print("未知のボードまたは不正なモードです。")

------------------------------------------------------------
Jetson Orin Nanoを認識: I2Cバス番号: 7, Powerモード: MODE_7W(1)に設定します。
------------------------------------------------------------


In [2]:
if (product_name == "Jetson Orin Nano") or (product_name == "Jetson AGX Orin"):
    print("Docker起動のため電力モードは変更できません。")
else:
    !echo "jetson" | sudo -S nvpmodel -m $power_mode

Docker起動のため電力モードは変更できません。


In [3]:
!echo "jetson" | sudo -S nvpmodel -q

NVPM WARN: power mode is not set!


In [4]:
if (product_name == "Jetson Orin Nano") or (product_name == "Jetson AGX Orin"):
    print("Docker起動のためjetson_clocksは起動できません。")
else:
    !echo "jetson" | sudo -S jetson_clocks

Docker起動のためjetson_clocksは起動できません。


## Datasetを指定

DATA_SETSの配列は、自分の作成したデータ設定名に修正します。

In [5]:
DATA_SETS = ["dataset/tokyo_1121_data_001","dataset/tokyo_1121_data_002","dataset/tokyo_1121_data_003","dataset/tokyo_1121_data_004","dataset/tokyo_1121_data_005"]

In [6]:
import torch
import torchvision
import time
from xy_dataset import XYDataset
import torchvision.transforms as transforms

def load_data(path=''):
    global dataset
    CATEGORIES = ['xy','speed']
    TRANSFORMS = transforms.Compose([
        transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    dataset = XYDataset(path, CATEGORIES, TRANSFORMS, random_hflip=True)
    print(f'データを{len(dataset)} 件読み込みました')
    return dataset

In [7]:
from torch.utils.data import ConcatDataset

all_datasets = []
for dataset_path in DATA_SETS:
    dataset = load_data(dataset_path)
    all_datasets.append(dataset)

# Concatenate all datasets
full_dataset = ConcatDataset(all_datasets)
print(f'全データセットを結合しました。合計 {len(full_dataset)} 件のデータがあります。')

データを157 件読み込みました
データを111 件読み込みました
データを189 件読み込みました
データを274 件読み込みました
データを50 件読み込みました
全データセットを結合しました。合計 781 件のデータがあります。


In [8]:
import re
device = torch.device('cuda')

def pretrained_model():
    # ALEXNET
    # model = torchvision.models.alexnet(pretrained=True)
    # model.classifier[-1] = torch.nn.Linear(4096, output_dim)

    # SQUEEZENET
    # model = torchvision.models.squeezenet1_1(pretrained=True)
    # model.classifier[1] = torch.nn.Conv2d(512, output_dim, kernel_size=1)
    # model.num_classes = len(dataset.categories)

    # RESNET 18
    model = torchvision.models.resnet18(pretrained=True)
    model.fc = torch.nn.Linear(512, output_dim)

    # RESNET 34
    # model = torchvision.models.resnet34(pretrained=True)
    # model.fc = torch.nn.Linear(512, output_dim)

    # DENSENET 121
    # model = torchvision.models.densenet121(pretrained=True)
    # model.classifier = torch.nn.Linear(model.classifier.in_features, output_dim)

    return model

def weights_model():
    # ALEXNET
    # model = torchvision.models.alexnet(weights=torchvision.models.AlexNet_Weights.DEFAULT)
    # model.classifier[-1] = torch.nn.Linear(4096, output_dim)

    # SQUEEZENET
    # model = torchvision.models.squeezenet1_1(weights=torchvision.models.SqueezeNet1_1_Weights.DEFAULT)
    # model.classifier[1] = torch.nn.Conv2d(512, output_dim, kernel_size=1)
    # model.num_classes = len(dataset.categories)

    # RESNET 18
    model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.DEFAULT)
    model.fc = torch.nn.Linear(512, output_dim)

    # RESNET 34
    # model = torchvision.models.resnet34(weights=torchvision.models.ResNet34_Weights.DEFAULT)
    # model.fc = torch.nn.Linear(512, output_dim)

    # DENSENET 121
    # model = torchvision.models.densenet121(weights=torchvision.models.DenseNet121_Weights.DEFAULT)
    # model.classifier = torch.nn.Linear(model.classifier.in_features, output_dim)

    return model

def load_trained_mode():
    global model
    
    version_str = torchvision.__version__
    match = re.match(r'(\d+)\.(\d+)\.(\d+)', version_str)
    if match:
        major, minor, _ = map(int, match.groups())
        # 0.13以上の場合
        if major > 0 or minor >= 13:
            # RESNET 18
            model = torchvision.models.resnet18(weights=None)  # pretrained=Falseの代わり
            model.fc = torch.nn.Linear(model.fc.in_features, output_dim)
        else:
            model = torchvision.models.resnet18(pretrained=False)
            model.fc = torch.nn.Linear(512, output_dim)
    
def load_pretrained_model():
    global model
    print('Pre-trainedモデルを読み込みます。')
    # torchvisionのバージョン文字列を取得
    version_str = torchvision.__version__

    # 正規表現でメジャー、マイナー、パッチのバージョンを抜き出す
    match = re.match(r'(\d+)\.(\d+)\.(\d+)', version_str)
    if match:
        major, minor, _ = map(int, match.groups())
        # 0.13以上の場合
        if major > 0 or minor >= 13:
            # pretrainedが非推奨となったため、最新の学習済みwightsを使う
            # https://pytorch.org/blog/introducing-torchvision-new-multi-weight-support-api/
            model = weights_model()
        else:
            # pretrainedを使う
            model = pretrained_model()
    else:
        print("Unable to parse torchvision version")

def load_model(model_file):
    global model, optimizer, output_dim
    # 前提：datasetを読み込み済み
    output_dim = 2 * len(dataset.categories)  # x, y coordinate for each category

    # 学習済みの重みがあれば読み込みます
    if os.path.exists(model_file):
        print(f'重み情報{model_file}を読み込みます。')
        load_trained_mode()
        model = model.to(device)
        model.load_state_dict(torch.load(model_file))
    else:
        # PreTraingのモデルを読み込みます
        load_pretrained_model()
        model = model.to(device)
    
    model = model.eval()

    optimizer = torch.optim.Adam(model.parameters())

def save_model(model_file):
    # 学習済みの重みを.pthファイルに保存します。(モデル構造は含みません)
    torch.save(model.state_dict(), model_file)
    print("学習結果を" + model_file + "に保存しました。")

In [9]:
import matplotlib.pyplot as plt
from tqdm import tqdm
from IPython.display import clear_output
import torch.utils.data as data
import time
import os

In [10]:
# 初期の最良の損失値を無限大として設定
best_test_loss = float('inf')
best_train_loss = float('inf')
# 学習と評価の損失の履歴
train_losses = []
test_losses = []
# エポックの履歴
epochs = []

def filter_none(data):
    return [(images, category_idx, xy) for images, category_idx, xy in data if images is not None and xy is not None and category_idx is not None]

def train_eval(is_training=True, batch_size=8, epoch=20, stop_count=10):
    global model, full_dataset, optimizer, best_test_loss, best_train_loss

    # 破損データ等を対象外にする
    valid_data = []
    for i in range(len(full_dataset)):
        try:
            _ = full_dataset[i]
            valid_data.append(full_dataset[i])
        except AttributeError as e:
            print(f"無効なデータが検出されました（インデックス：{i}）: {e}")

    full_dataset = valid_data

    # FilterでNoneデータを削除
    full_dataset = filter_none(full_dataset)

    # データ数を計算
    total_size = len(full_dataset)

    # テストデータの割合
    split_size = total_size * 10 // 100  # １0%をテストデータとして使用
    
    # テストデータと学習データに切り分け
    indices = list(range(total_size))
    train_indices, test_indices = indices[split_size:], indices[:split_size]
    train_dataset = data.Subset(full_dataset, train_indices)
    test_dataset = data.Subset(full_dataset, test_indices)

    # データセットを作成
    train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    model = model.train()
    non_improving_epochs = 0
    epoch_count = 0

    try:
        while epoch > 0:
            sum_train_loss = 0.0

            # グラフを描画
            data_size = len(train_loader) * batch_size
            clear_output(wait=True)
            plt.plot(epochs, train_losses, label='Train Loss')
            plt.plot(epochs, test_losses, label='Test Loss')
            plt.title(f"Train Loss ({total_size - split_size} datas) vs Test Loss({split_size} datas)")
            plt.xlabel('Epochs')
            plt.ylabel('Loss')
            plt.legend()
            plt.grid(True)
            plt.show()

            # 時刻計測
            start_time = time.time()

            # 学習の進行状況を表示するプログレスバー
            progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch_count + 1}")

            for i, (images, category_idx, xy) in progress_bar:
                if images is None or xy is None:
                    print("Warning: None type data found at index", i)
                    continue
                
                # GPUメモリに転送
                images = images.to(device)
                xy = xy.to(device)
                
                # 前のepochの勾配をクリア
                optimizer.zero_grad()

                outputs = model(images)
                loss = 0.0
                for batch_idx, cat_idx in enumerate(list(category_idx.flatten())):
                    loss += torch.mean((outputs[batch_idx][2 * cat_idx:2 * cat_idx+2] - xy[batch_idx])**2)
                loss /= len(category_idx)
                loss.backward()
                optimizer.step()
                sum_train_loss += float(loss)

                # 進行状況バーに損失値を表示
                average_loss = sum_train_loss / (i + 1)
                progress_bar.set_description(f"Epoch {epoch_count + 1} Loss: {average_loss:.5f}")

            # Train損出を計算しグラフ用の配列に格納
            train_loss = sum_train_loss / len(train_loader)
            train_losses.append(train_loss)

            # テストデータを使っての評価
            model = model.eval()
            sum_test_loss = 0.0
            with torch.no_grad():
                for images, category_idx, xy in test_loader:
                    if images is None or xy is None:
                        print("Error: None type data found at index", i)
                        continue

                    images = images.to(device)
                    xy = xy.to(device)
                    outputs = model(images)
                    loss = 0.0
                    for batch_idx, cat_idx in enumerate(list(category_idx.flatten())):
                        loss += torch.mean((outputs[batch_idx][2 * cat_idx:2 * cat_idx+2] - xy[batch_idx])**2)
                    loss /= len(category_idx)
                    sum_test_loss += float(loss)

            # Test損出を計算しグラフ用の配列に格納
            test_loss = sum_test_loss / len(test_loader)
            test_losses.append(test_loss)
              
            # Testデータ数が100を超えるまではtrain_lossで評価
            best_model = False
            if split_size > 100:
                if test_loss < best_test_loss:   
                    best_test_loss = test_loss   
                    non_improving_epochs = 0

                    # Bestモデルを保存
                    save_model("./model/train_best_model.pth")
                    print("Saved best model with test loss:", best_test_loss)
                    best_model = True
                else:
                    non_improving_epochs += 1
            else:
                if train_loss < best_train_loss:   
                    best_train_loss = train_loss  
                    non_improving_epochs = 0

                    # Bestモデルを保存
                    save_model("./model/train_best_model.pth")
                    print("Saved best model with test loss:", best_train_loss)
                    best_model = True
                else:
                    non_improving_epochs += 1

            # Epochをマイナス1, グラフ用には+1
            epoch -= 1
            epoch_count += 1
            epochs.append(epoch_count)
            
            # evalからtrainに戻す
            model = model.train()

            # log.txtに学習状況を追記
            with open("./log.txt", "a") as file:
                end_time = time.time()  
                epoch_duration = end_time - start_time  
                total_time = (epoch_count + 1) * epoch_duration 

                file.write(f"Epoch {epoch_count}: Train Loss: {loss:.5f}, Test Loss: {test_loss:.5f}, Best Model: {best_model}, Time: {epoch_duration/60:.4f} 分, Total time: {total_time/60:.4f} 分\n")

            if non_improving_epochs >= stop_count:
                print("Loss hasn't improved for {} consecutive epochs. Stopping training.".format(MAX_NON_IMPROVING_EPOCHS))
                break

    except Exception as e:
        print(f"Error: {e}")

In [None]:
MAX_NON_IMPROVING_EPOCHS = 30
EPOCHS = 100
BATCH_SIZE = 8
# 初期の最良の損失値を無限大として設定
best_loss = float('inf')
# 学習と評価の損失の履歴
train_losses = []
test_losses = []
# エポックの履歴
epochs = []

load_model("")
# best_model.pthを追加で学習する場合
#load_model("best_model.pth")
train_eval(batch_size=BATCH_SIZE, epoch=EPOCHS, stop_count=MAX_NON_IMPROVING_EPOCHS)

Pre-trainedモデルを読み込みます。


## 動画の作成

In [None]:
import ipywidgets
from ipywidgets import Button, Layout, Textarea, HBox, VBox, Label
import os
import glob

l = Layout(flex='0 1 auto', height='100px', min_height='100px', width='auto')
process_widget = ipywidgets.Textarea(description='ログ', value='', layout=l)

process_no = 0
def write_log(msg):
    global process_widget, process_no
    process_no = process_no + 1
    process_widget.value = str(process_no) + ": " + msg + "\n" + process_widget.value

In [None]:
LOAD_TASK = ['camera']
IMG_WIDTH = 224
IMG_HEIGHT = 224
current_path = os.getcwd()
SKIP = [1,2,3,4,5]

load_datasets_widget = ipywidgets.Dropdown(options=[], description='dataset')
load_task_widget = ipywidgets.Dropdown(options=LOAD_TASK, description='task')
movie_button = ipywidgets.Button(description='動画の作成')
movie_name_widget = ipywidgets.Text(description='動画名',value="run_video_train")
movie_skip_dropdown = ipywidgets.Dropdown(options=SKIP, description='skip(枚)', index=1)

def change_load_task(change):
    global current_path
    try:
        path = os.path.join(current_path,load_task_widget.value)
        files = os.listdir(path)
        dirs = [f for f in files if os.path.isdir(os.path.join(path, f))]
        dirs = [f for f in files if f != ".ipynb_checkpoints"]
        dirs = sorted(dirs)
        load_datasets_widget.options = dirs
    except:
        write_log(path + "が存在していません。")
        load_datasets_widget.options = []
load_task_widget.observe(change_load_task)
change_load_task(LOAD_TASK[0])

In [None]:
import cv2
import glob
from utils import preprocess
import re
import torch.nn.functional as F
import time

load_model("./model/train_best_model.pth")

def extract_numbers(filename):
    matches = re.findall(r'(\d+)', filename)
    if matches and len(matches) >= 3: 
        return int(matches[-1])  
    else:
        return float('inf')

def get_file_names(path):
    file_names = os.listdir(path)
    file_names = [os.path.join(path, file_name) for file_name in file_names]
    image_names = []

    image_names = sorted(file_names, key=lambda f: extract_numbers(os.path.basename(f)))
    image_names = [f for f in image_names if os.path.splitext(f)[1].lower() == ".jpg"]
    
    return image_names

def make_movie(change):
    global model,current_path
    
    if not movie_name_widget.value.strip():
        write_log("ファイル名を指定してください。")
        return 
    write_log("動画を作成します。")
    path = os.path.join(current_path, "video/")
    if not os.path.exists(path):
        subprocess.call(['mkdir', '-p', path])
    output = path + movie_name_widget.value + ".mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = int(30 / movie_skip_dropdown.value)
    outfh = cv2.VideoWriter(output, fourcc, fps, (224, 224))
    file_list = sorted(
        glob.glob(load_task_widget.value + '/' + load_datasets_widget.value + '/xy/*.jpg'),
        key=os.path.getmtime
    )
    
    xy_path = os.path.join(current_path, load_task_widget.value, load_datasets_widget.value, "xy")     
    file_list = os.listdir(xy_path)
    file_list = [os.path.join(xy_path, file_name) for file_name in file_list if file_name.endswith('.jpg')]    
    file_list = sorted(file_list, key=lambda f: extract_numbers(os.path.basename(f)))
    
    
    try:
        res_num = len(file_list)
        
        count = 0
        skip_movie = movie_skip_dropdown.value
        terminal_time = 1/(30/skip_movie)
        current_time = 0
        process_time = 0
        total_process_time = 0
        for i, file_name in enumerate(file_list):
            
            if i % skip_movie == 0:
                current_time += terminal_time
                img = cv2.imread(file_name)
                
                process_time = time.time()
                preprocessed = preprocess(img)
                output = model(preprocessed).detach().cpu().numpy().flatten()
                result_x = float(output[0])
                result_y = float(output[1])
                result_x = int(IMG_WIDTH * (result_x / 2.0 + 0.5))
                result_y = int(IMG_HEIGHT * (result_y / 2.0 + 0.5))    
                img = cv2.circle(img, (int(result_x), int(result_y)), 8, (255, 0, 0), 3)

                # Speed
                result_speed = output[3]
                result_speed = int(IMG_WIDTH * (result_speed / 2.0 + 0.5))
                if result_speed > 224:
                    result_speed = 244
                elif result_speed < 0:
                    result_speed = 0
                img = cv2.line(img,(218,0),(218,224),(0,0,0),5)
                img = cv2.line(img,(219,224-result_speed),(219,224),(0,140,255),3)
                img = cv2.putText(img,"speed:"+str(result_speed),(160,215),cv2.FONT_HERSHEY_SIMPLEX,0.3,(255,255,255))
                total_process_time += time.time() - process_time 
                
                if i % (skip_movie*10) == 0:
                    write_log(f"{current_time:.1f}秒まで完了, 推論処理平均: {total_process_time/10*1000:.1f}ms, {int(i/skip_movie)}枚目/{int(res_num/skip_movie)}枚中を処理中")
                    total_process_time = 0
                outfh.write(img)
                del img
    finally:
        # エラーが発生しても確実にリソースを解放する
        outfh.release()
        write_log("動画の出力が完了しました。")

movie_button.on_click(make_movie)

In [None]:
separator = ipywidgets.HTML('<hr style="border-color:gray;margin:10px 0"/>')
title1 = ipywidgets.HTML('<b>【走行動画の作成】</b> 動画ファイル名を指定してください。')

convert_widget = ipywidgets.VBox([
    separator,
    title1,
    ipywidgets.HBox([load_datasets_widget,load_task_widget]),
    ipywidgets.HBox([movie_name_widget,movie_skip_dropdown,movie_button]),
    process_widget,
])
display(convert_widget)