Demonstrating how to get DonkeyCar Tub files into a PyTorch/fastai DataBlock

In [None]:
from fastai.data.all import *
from fastai.vision.all import *
from fastai.data.transforms import ColReader, Normalize, RandomSplitter
import torch

In [None]:
from donkeycar.parts.tub_v2 import Tub
import pandas as pd
from pathlib import Path

In [None]:
from malpi.dk.train import preprocessFileList

In [None]:
def tubs_from_filelist(file_list, verbose=False):
    """ Load all tubs listed in all files in file_list """
    tub_dirs = preprocessFileList(file_list)
    tubs = []
    count = 0
    root_path = Path("data")
    for item in tub_dirs:
        if Path(item).is_dir():
            try:
                t = Tub(str(item),read_only=True)
            except FileNotFoundError as ex:
                continue
            except ValueError as ex:
                # In case the catalog file is empty
                continue
            tubs.append(t)
            count += len(t)
    if verbose:
        print( f"Loaded {count} records." )
        
    return tubs
        
def tubs_from_directory(tub_dir, verbose=False):
    """ Load all tubs in the given directory """
    tubs = []
    count = 0
    root_path = Path(tub_dir)
    for item in root_path.iterdir():
        if item.is_dir():
            try:
                t = Tub(str(item),read_only=True)
                count += len(t)
            except FileNotFoundError as ex:
                continue
            except ValueError as ex:
                # In case the catalog file is empty
                continue
            tubs.append(t)
    if verbose:
        print( f"Loaded {count} records." )
    
    return tubs
        
def dataframe_from_tubs(tubs):
    dfs = []
    for tub in tubs:
        df = pd.DataFrame(tub)
        name = Path(tub.base_path).name
        pref = os.path.join(tub.base_path, Tub.images() ) + "/"
        df["cam/image_array"] = pref + df["cam/image_array"]
        dfs.append(df)
        #print( f"Tub {name}: {df['user/throttle'].min()} - {df['user/throttle'].max()}" )
    return pd.concat(dfs)

In [None]:
def get_dataframe(inputs, verbose=False):
    tubs = None
    
    try:
        input_path = Path(inputs)
        if input_path.is_dir():
            tubs = tubs_from_directory(input_path)
    except TypeError as ex:
        pass
    
    if tubs is None:
        if isinstance(inputs, str):
            inputs = [inputs]
        tubs = tubs_from_filelist(inputs)
    
    if tubs is None:
        if verbose:
            print( f"No tubs found at {inputs}")
        return None
    
    df_all = dataframe_from_tubs(tubs)
    
    if verbose:
        df_all.describe()
        
    return df_all

In [None]:
def get_data(inputs, df_all=None, batch_tfms=None, verbose=False):
    
    if df_all is None:
        df_all = get_dataframe(inputs, verbose)
        
    # Normalizing is already done for us, probably because it's defined as an ImageBlock
    #tfms = [*aug_transforms(do_flip=False, size=128)]  # Add default transformations except for horizontal flip\n",
    tfms = [Resize(128,method="squish")]
# Add to DataBlock: batch_tfms=tfms"

    pascal = DataBlock(blocks=(ImageBlock, RegressionBlock(n_out=2)),
                       splitter=RandomSplitter(),
                       get_x=ColReader("cam/image_array"),
                       get_y=ColReader(['user/angle','user/throttle']),
                       item_tfms=tfms,
                       batch_tfms=batch_tfms,
                       n_inp=1)
    
    dls = pascal.dataloaders(df_all)
    
    if verbose:
        dls.show_batch()
        dls.one_batch()[0].shape
        
    return dls

In [None]:
def learn_resnet():
    learn2 = cnn_learner(dls, resnet18, loss_func=MSELossFlat(), metrics=[rmse], cbs=ActivationStats(with_hist=True))
    learn2.fine_tune(5)
    
    learn2.recorder.plot_loss()
    learn2.show_results(figsize=(20,10))

The below code is modified from: https://github.com/cmasenas/fastai_navigation_training/blob/master/fastai_train.ipynb.

TODO: Figure out how to have multiple output heads

In [None]:
def get_learner(dls):
    model = torch.nn.Sequential(
        ConvLayer(3, 24, stride=2),
        ConvLayer(24, 32, stride=2),
        ConvLayer(32, 64, stride=2),
        ConvLayer(64, 128, stride=2),
        ConvLayer(128, 256, stride=2),
        nn.AdaptiveAvgPool2d(1),
        Flatten(),
        nn.Linear(256, 50),
        nn.ReLU(),
        nn.Linear(50, dls.c),
        nn.Tanh()
        )
#print(model)
    callbacks=ActivationStats(with_hist=True)
    learn = Learner(dls, model,  loss_func = MSELossFlat(), metrics=[rmse], cbs=callbacks)
    #valley = learn.lr_find()
    return learn

In [None]:
def test_one_transform(name, inputs, df_all, tfm, epochs, lr):
    dls = get_data(inputs, df_all=df_all, batch_tfms=tfm)
    callbacks = [CSVLogger(f"Transform_{name}.csv", append=True)]
    learn = get_learner(dls)
    #learn.no_logging() #Try this to block logging when doing many training test runs
    learn.fit_one_cycle(epochs, lr, cbs=callbacks)
    #learn.recorder.plot_loss()
    #learn.show_results(figsize=(20,10))

In [None]:
# Train multipel times using a list of Transforms, one at a time.
# Compare mean/stdev of best validation loss (or rmse?) for each Transform
df_all = get_dataframe("track1_warehouse.txt")
transforms = [None]
transforms.extend( [*aug_transforms(do_flip=False, size=128)] )
for tfm in transforms:
    name = "None" if tfm is None else str(tfm.__class__.__name__)
    print( f"Transform: {name}" )
    for i in range(5):
        print( f"   Run {i+1}" )
        test_one_transform(name, "track1_warehouse.txt", df_all, None, 5, 3e-3)

In [None]:
def visualize_learner( learn ):
    #dls=nav.dataloaders(df, bs=512)
    preds, tgt = learn.get_preds(dl=[dls.one_batch()])

    plt.title("Target vs Predicted Steering", fontsize=18, y=1.0)
    plt.xlabel("Target", fontsize=14, labelpad=15)
    plt.ylabel("Predicted", fontsize=14, labelpad=15)
    plt.plot(tgt.T[0], preds.T[0],'bo')
    plt.plot([-1,1],[-1,1],'r', linewidth = 4)
    plt.show()

    plt.title("Target vs Predicted Throttle", fontsize=18, y=1.02)
    plt.xlabel("Target", fontsize=14, labelpad=15)
    plt.ylabel("Predicted", fontsize=14, labelpad=15)
    plt.plot(tgt.T[1], preds.T[1],'bo')
    plt.plot([0,1],[0,1],'r', linewidth = 4)
    plt.show()

In [None]:
learn.export()

In [None]:
df_all = get_dataframe("track1_warehouse.txt")
dls = get_data("track1_warehouse.txt", df_all=df_all, batch_tfms=None)

In [None]:
learn = get_learner(dls)
learn.fit_one_cycle(15, 3e-3)

In [None]:
visualize_learner(learn)

In [None]:
learn.export('models/track1_v2.pkl')