## Librarires

In [1]:
import re
from exp.nb_09 import *

## Useful functions

In [2]:
_camel_re1 = re.compile('(.)([A-Z][a-z]+)')
_camel_re2 = re.compile('([a-z0-9])([A-Z])')

# Converts camel names to snake names
def camel2snake(name):
    s1 = re.sub(_camel_re1, r'\1_\2', name)
    return re.sub(_camel_re2, r'\1_\2', s1).lower()

c2s = camel2snake("BoomDoomWeeDg")
c2s, _camel_re1

('boom_doom_wee_dg', re.compile(r'(.)([A-Z][a-z]+)', re.UNICODE))

## Using Exceptions as flow control

In [3]:
#export
class Callback():
    _order=0
    def set_runner(self, run): self.run=run
    def __getattr__(self, k): return getattr(self.run, k) # if cannot find the attribute inside the cb, 
    #look inside the runner
    
    @property
    def name(self):
        name = re.sub(r'Callback$', '', self.__class__.__name__)
        return camel2snake(name or 'callback')
    
    def __call__(self, cb_name): 
        f = getattr(self, cb_name, None) # pass the call back def name to get attr to run the function
        if f and f(): return True
        return False

class TrainEvalCallback(Callback):
    def begin_fit(self):
        self.run.n_epochs=0.
        self.run.n_iter=0
    
    def after_batch(self):
        if not self.in_train: return
        self.run.n_epochs += 1./self.iters
        self.run.n_iter   += 1
        
    def begin_epoch(self):
        self.run.n_epochs=self.epoch
        self.model.train()
        self.run.in_train=True

    def begin_validate(self):
        self.model.eval()
        self.run.in_train=False

class CancelTrainException(Exception): pass
class CancelEpochException(Exception): pass
class CancelBatchException(Exception): pass

class TestCbsCallback(Callback):
    def begin_fit(self):
        print("I'm a test callback")



## Combining Learner and runner

In [4]:
#export
def param_getter(m): return m.parameters() #  getst the parameters of a model

def sgd_opt():
    pass

class LearnerRunnerDJ():
    
    def __init__(self, model, data, loss_func, opt_func=sgd_opt, lr=1e-2, splitter=param_getter,
                 cbs=None, cb_funcs=None): # callback functions used to create callbacks
        
        # model - the model
        # data 
        # loss func - usually cross entropy
        # opt func - optimiser
        self.model , self.data, self.loss_func, self.opt_func, self.lr, self.splitter = model, data, loss_func, opt_func,lr, splitter
        
        self.logger, self.in_train, self.opt = print, False, None # the logger is a print function to print the output from a nn
        
        self.cbs = []
        self.add_cb(TrainEvalCallback())
        self.add_cbs(cbs)
        self.add_cbs(cbf() for cbf in listify(cb_funcs)) # goes through all callback creastor funcs and runs them
        
    def add_cbs(self, cbs):
        for cb in listify(cbs): self.add_cb(cb) # call add cb on all callbacks
            
    def add_cb(self, cb):
        cb.set_runner(self) # sets a 'pointer' to the runner inside the callback (so can access runner members such as)
        #set_trace()
        setattr(self, cb.name, cb) # set the callback as a member of the learner class
        self.cbs.append(cb) # add the callback to the call backs list

    def remove_cbs(self, cbs):
        for cb in listify(cbs): self.cbs.remove(cb)
            
    def all_batches(self):
        self.iters = len(self.dl) # Batch size is defined by the data loader
        try:
            for i, (xb, yb) in enumerate(self.dl): # gets some data from the dataloader
                self.one_batch(i, xb, yb)
        except CancelEpochException: self('after_cancel_batch')
            
    
    def one_batch(self, i, xb, yb): # all the stuff
        try:
            self.iter = i
            self.xb, self.yb = xb, yb;                      self('begin_batch')
            # Run model
            self.pred = self.model(xb);                     self('after_pred')
            # Calculate loss
            self.loss = self.loss_func(self.yb, self.pred); self('after_loss')
            # Update parameters if in 
            if not self.in_train: return
            # perform backward pass calculates gradients
            self.loss.backward();                           self('after_backward')
            # perform step (updates parameter)         
            self.opt.step();                                self('after_step')
            # zero gradients
            self.opt.zero_grad();                           
        except CancelBatchException:                        self('cancel_batch')
        finally: self('after_batch')
 
    # Function to begin a fit
    def do_begin_fit(self, epochs):
        self.epochs,self.loss = epochs,tensor(0.)
        self('begin_fit')
        
    # Method to begin an epoch
    def do_begin_epoch(self, epoch):
        self.dl, self.epoch = self.data.train_dl, epoch # set the current data loader to training
        print("In do_begin_epoch")
        return self('begin_epoch')
    
    def fit(self, epochs, cbs=None, reset_opt=False): # main fitting function
        # can now pass in additional callbacks to fit
        self.add_cbs(cbs)
        #create optimizer on fit(), optionally replacing existing
        #if reset_opt or not self.opt: self.opt = self.opt_func(self.splitter(self.model), lr=self.lr)
            
        # try running
        try:
            self.do_begin_fit(epochs)
            # Run all training batches for the epoch - runs through all training data 
            for epoch in range(epochs):
                self.do_begin_epoch(epoch)
                if not self('begin_epoch'): self.all_batches()
                
            
                # Run all validation batches for the epoch -runs through all validation data
                with torch.no_grad(): # turn off gradients
                    self.dl = self.data.valid_dl # set current data loader to validation
                    if not self('begin_validate'): self.all_batches()
                
            self('after_epoch')
       # Use exceptions to cance training
        except CancelTrainException: self('after_cancel_train')
        # s
        finally:
            self('after_fit')
            self.remove_cbs(cbs)
            
    ALL_CBS = {'begin_batch', 'after_pred', 'after_loss', 'after_backward', 'after_step',
        'after_cancel_batch', 'after_batch', 'after_cancel_epoch', 'begin_fit',
        'begin_epoch', 'begin_validate', 'after_epoch',
        'after_cancel_train', 'after_fit'}
        
    def __call__(self, cb_name):
        res = False
        assert cb_name in self.ALL_CBS
        for cb in sorted(self.cbs, key=lambda x: x._order): res = cb(cb_name) and res
        #set_trace()
        return res # callback needs to return True to stop. With no return Python returns None (False)
        

## Average stats call back

In [5]:
#export
class AvgStats(): # average stats is a class to calculate and store training stats (i.e accuracy)
    def __init__(self, metrics, in_train): 
        self.metrics,self.in_train = listify(metrics),in_train
        print("Metrics:")
        print(metrics)
        
    def reset(self):
        self.tot_loss,self.count = 0.,0
        self.tot_mets = [0.] * len(self.metrics)
        
    @property
    def all_stats(self): return [self.tot_loss.item()] + self.tot_mets
    @property
    def avg_stats(self): return [o/self.count for o in self.all_stats]
    
    def __repr__(self):
        if not self.count: return ""
        return f"{'train' if self.in_train else 'valid'}: {self.avg_stats}"

    def accumulate(self, run): # run is presumably the runner
        bn = run.xb.shape[0] # xb is the mini batch size
        self.tot_loss += run.loss * bn # I think this is accounting for the batch size
        self.count += bn
        for i,m in enumerate(self.metrics): # loops through metric functions and applies them to predictions and y
            self.tot_mets[i] += m(run.pred, run.yb) * bn # can have any metrics on the predictions and truth 

class AvgStatsCallback(Callback):
    def __init__(self, metrics):
        # train_stats and valid_stats are containers for stats for training and valdation sets
        self.train_stats,self.valid_stats = AvgStats(metrics,True),AvgStats(metrics,False)
        
    def begin_epoch(self):
        self.train_stats.reset()
        self.valid_stats.reset()
        
    def after_loss(self): # after loss, the accumulates the loss on each mini batch
        stats = self.train_stats if self.in_train else self.valid_stats
        with torch.no_grad(): stats.accumulate(self.run)
    
    def after_epoch(self):
        #We use the logger function of the `Learner` here, it can be customized to write in a file or in a progress bar
        self.logger(self.train_stats)
        self.logger(self.valid_stats) 

## Data loading 1) Image list containers
These objects store a list of image locations. Image can be accessed by index or iterator. Asking for a single image opens the 

In [26]:
# get files, function to get the image files stored in a directory structure
#export
def _get_files(p, fs, extensions=None):
    p = Path(p)
    res = [p/f for f in fs if not f.startswith('.')
           and ((not extensions) or f'.{f.split(".")[-1].lower()}' in extensions)]
    return res
#export
def get_files(path, extensions=None, recurse=False, include=None):
    path = Path(path)
    extensions = setify(extensions)
    extensions = {e.lower() for e in extensions}
    if recurse:
        res = []
        for i,(p,d,f) in enumerate(os.walk(path)): # returns (dirpath, dirnames, filenames)
            if include is not None and i==0: d[:] = [o for o in d if o in include]
            else:                            d[:] = [o for o in d if not o.startswith('.')]
            res += _get_files(p, f, extensions)
        return res
    else:
        f = [o.name for o in os.scandir(path) if o.is_file()]
        return _get_files(path, f, extensions)

# Item list base class 
class ListContainer():
    
    def __init__(self, lst):
        #assert(isinstance(lst, list))
        self.lst = listify(lst)
    
    # different responses to accessing the item list, including ints, slices and 
    def __getitem__(self,idx):
        if isinstance(idx, (int, slice)): return self.lst[idx]
        if isinstance(idx[0], bool):
            assert len(idx)==len(self)
            return [o for dx,o in zip(idx, self.lst) if dx]
        return [self.lst[i] for i in idx]
        
    # get length of item list
    def __len__(self): return len(self.lst)
    
    # repr
    def __repr__(self):
        res = f'({self.__class__.__name__})({len(self)} items)\n{self.lst[:10]}'
        if len(self) > 100 : res = res + f'...'
        return res
    
    def __iter__(self):
        return iter(self.lst)
    
    def __setitem__(self, itm, dx = False):
        
        if dx:
            self.lst[dx] = itm
        else: self.lst.append(itm)
            
    def __delitem__(self, i):
        
        try:
            assert(len(self) - 1 >= i)
        
            del(self.lst[i])
        except AssertionError as error:
            print(error, " Deletion out of range of list")

# Listify - returns a list
def listify(x):
    if x is None: return []
    if isinstance(x, list): return x
    if isinstance(x, str): return list(x)
    if isinstance(x, set): return list(x)
    return [x]

# Compose function (takes a list of functions and applies them in order (giving the function an order attribute))
def compose(x, funcs, *args, order_key='_order', **kwargs):
    #set_trace()
    if funcs is None: return x
    key = lambda o: getattr(o, order_key, 0)
    for f in sorted(listify(funcs), key=key): 
        #print(f)
        x = f(x, **kwargs)
    return x


# List container base class
# should store something in a list and return it in applying functions
class ItemList(ListContainer):    
    def __init__(self, lst, pth = '.', trnsfrms = None):
        super().__init__(lst)
        self.trnsfrms, self.pth = trnsfrms, pth
        
    def __repr__(self): 
        res = super().__repr__() + self.pth
        return res
        
    def get(self, itm): return itm
    def _get(self, itm):
        return compose(self.get(itm), self.trnsfrms)
        
    def __getitem__(self, idx):
        itms = super().__getitem__(idx)
        if isinstance(itms, list): return [self._get(o) for o in itms]
        return self._get(itms)
        
    # This new allows the class to easily make a copy of itself
    def new(self, items, cls=None):
        if cls is None: cls=self.__class__
        return cls(items, self.pth, trnsfrms=self.trnsfrms)
    
# This is an image container class. Uses PIL lirary to ope images
class ImageList(ItemList):
    def get(self, fn): return PIL.Image.open(fn)
    @classmethod
    def from_files(cls, path, extensions=None, recurse=True, include=None, **kwargs):
        if extensions is None: extensions = image_extensions
        return cls(get_files(path, extensions, recurse=recurse, include=include), path, **kwargs)
    

## Data loader 2) Processor and splitter
1) Processor labels the data so that we can use the same encoding the labels on different data sets

2) Splitter applies the dataset into random and training

In [27]:
# Uniqueify (returns a list of the unique files)
from collections import OrderedDict

def uniqueify(x, sort=False):
    res = list(OrderedDict.fromkeys(x).keys())
    if sort: res.sort()
    return res

# data Processor (assigns an integer refernce to each class in the dataset)

class Processor():
    def __init__(self, vocab = None):
        self.vocab = vocab
        
    def __call__(self, items): # returns a list of the values corresponding to the classes
        if self.vocab is None:
            self.vocab = uniqueify(items)
            self.otoi = {v:k for k, v in enumerate(self.vocab)} # makes a dictionary of numbers and key values
        return [self.proc1(i)for i in items] # key is the class, i is the label integer for otoi, vocab is 
        # an ordered list of the possible classes with no label (same order as otoi)
        
    def proc1(self, item):  return self.otoi[item]
    
    def deProcess(self, idxs):
        assert self.vocab is not None
        return [self.deproc1(idx) for idx in idxs] # returns the classes from a list of indexes
        
    def deproc1(self, idx):
        return self.vocab[idx]

def parent_labeler(fn): return fn.parent.name

In [28]:
# Splitter stuff
# Grand parent splitter splits based on the whether the grand parnt folder name is train or valid
def grandparent_splitter(fn, valid_name='valid', train_name='train'):
    gp = fn.parent.parent.name
    return True if gp==valid_name else False if gp==train_name else None

# Just creates a random number, should probably check item is an image
def rand_splitter(fn, trainFrac = 0.7):
    p = float(torch.rand(1))
    return True if p > trainFrac else False



# Applies the splitter function to all items in the item list and splits them into different groups
# Puts the train set first
def split_by_func(items, f):
    mask = [f(o) for o in items]
     #`None` values will be filtered out
    f = [o for o,m in zip(items,mask) if m==False]
    t = [o for o,m in zip(items,mask) if m==True ]
    return f,t

randSplitter = partial(split_by_func, f = rand_splitter)

# class split data - splits and stores training and validation data
class SplitData():
    def __init__(self, train, valid):
        self.train, self.valid = train, valid
        
    def __repr__(self):
        return f'{self.__class__.__name__}\nTrain: {self.train} \n\nValid: {self.valid}'

    #This is needed if we want to pickle SplitData and be able to load it back without recursion errors
    def __setstate__(self,data:Any): self.__dict__.update(data) 
    
    @classmethod
    def split_by_func(cls, il, f):
        train, valid = map(il.new, split_by_func(il, f)) # map function creates iterator applying function to all elements of iterable
        return cls(train, valid)
    
 


In [29]:
# Finally - labelled data class stores labelled data for training and validation set. It stores the x and y (converted)
# to integer labels) 

def _label_by_func(ds, f, cls=ItemList): return cls([f(o) for o in ds.lst], pth=ds.pth)

class LabelledData():
    def process(self, il, proc): 
        #set_trace()
        return il.new(compose(il.lst, proc))

    def __init__(self, x, y, proc_x=None, proc_y=None):
        self.x =self.process(x, proc_x)
        #set_trace()
        self.y =self.process(y, proc_y)
        self.proc_x = proc_x

        self.proc_y = proc_x

        self.proc_y = proc_y
         
    def __repr__(self): return f'{self.__class__.__name__}\nx: {self.x}\ny: {self.y}\n'
    def __getitem__(self,idx): return self.x[idx],self.y[idx]
    def __len__(self): return len(self.x)
        
    @classmethod
    def label_by_func(cls, il, f, proc_x=None, proc_y=None):
        #set_trace()
        return cls(il, _label_by_func(il, f), proc_x=proc_x, proc_y=proc_y)

# make the training and validation set
def label_by_func(sd, f, proc_x=None, proc_y=None):
    train = LabelledData.label_by_func(sd.train, f, proc_x=proc_x, proc_y=proc_y)
    valid = LabelledData.label_by_func(sd.valid, f, proc_x=proc_x, proc_y=proc_y)
    return SplitData(train,valid)

## Transforms

In [30]:
# base class - only an order
class Transform():
    _order = 0
    
class ResizeFixed(Transform):
    _order=10
    def __init__(self,size):
        if isinstance(size,int): size=(size,size)
        self.size = size
        
    def __call__(self, item): return item.resize(self.size, PIL.Image.BILINEAR)

# To byte tensor transform
def toByteTensor(item):
    res = torch.ByteTensor(torch.ByteStorage.from_buffer(item.tobytes()))
    w,h = item.size
    return res.view(h,w,-1).permute(2,0,1)
toByteTensor._order=20    

# To float tensor
def toFloatTensor(item): return item.float()
toFloatTensor._order=30
    
# Grey to RBG
class MakeRGB(Transform):
    def __call__(self, item): return item.convert('RGB') # (item should be a PIL object)

## Model

## Optimiser

## Testing

In [31]:
#x = Callback()
tr = TrainEvalCallback()
#x.__class__.__name__, x.name, tr.name


def makeTestCB():
    return TestCbsCallback()

# Test exceptions
class ExcpTest(Callback):
    
    def begin_epoch(self):
        print("About to apply an exception")
        raise CancelEpochException()
        
    def after_cancel_train(self):
        print("I told you I was cancelling the training wiht an exception")

#xCpt = ExcpTest()
#run = LearnerRunnerDJ(1, 2, 3, cb_funcs= makeTestCB, cbs=xCpt)

#run.fit(1)

#run.cbs[0].run, run.cbs[0].name, run.cbs[1].name, run.train_eval


#avgSts = AvgStats(accuracy, True)

#x = tensor([1, 2])
#y = tensor([[2, 6, 3], [2, 5, 3]])

#accuracy(y, x)


        


In [32]:
# Test compose
class add1():
    def __init__(self):
        self._order = 2
        
    def __call__(self, x): return x+1
    
class take3Div2():
    def __init__(self):
        self._order = 1
    def __call__(self, x):return (x - 3)/2
    

xx = compose(1, [add1(), take3Div2()])
#xx = listify([1])
#ixinstance([1, 2], None)
#type(None)
xx

0.0

In [33]:
class test():
    
    def print(self):
        print("Hello")
        return True
        
    def __call__(self, func):
        f = getattr(self, func, None)
        print(f)
        if f and f(): return True # note f() runs the function
        return False
    
ttt = test()
ttt('print')

for i, j in enumerate(["a", "b", "c"]):
    print(i, j)

<bound method test.print of <__main__.test object at 0x7f26c5e90320>>
Hello
0 a
1 b
2 c


## Test list container

In [34]:
x = '/home/paul/fastaiMyData/bikes/'

ii = ImageList.from_files(x )
ii[5]
# use the transforms to convert an image somehow
class Transform(): _order=0

class MakeRGB(Transform):
    def __call__(self, item): return item.convert('LA')

def make_rgb(item): return item.convert('LA')

iii = ImageList.from_files(x, trnsfrms = [make_rgb])
iii[6]
imFn= iii.lst[8]
imFn.parent.parent.name
rand_splitter(imFn)

iiii = iii.new(iii.lst[0:4])
iii.trnsfrms, iiii.trnsfrms

([<function __main__.make_rgb(item)>], [<function __main__.make_rgb(item)>])

## Test dataloader class

In [35]:
# test uniqueify
s = ["a", "b", "v", "b", "a", "kkk"]
sUni = uniqueify(s)
# processor
p = Processor()


sUni, p(s), p.otoi, p.vocab, p.deProcess([0, 1, 2, 2, 0, 3])

# Test processor on our  class

(['a', 'b', 'v', 'kkk'],
 [0, 1, 2, 1, 0, 3],
 {'a': 0, 'b': 1, 'v': 2, 'kkk': 3},
 ['a', 'b', 'v', 'kkk'],
 ['a', 'b', 'v', 'v', 'a', 'kkk'])

In [36]:
# Test splitter
x = randSplitter(iii)
len(x[0]), len(x[1])

xxPro = Processor()
xxPro([parent_labeler(x) for  x in iii.lst[3:5]])

#xx = SplitData(x[0],x[1])
xx = SplitData.split_by_func(iii, rand_splitter)

# Test labelledata
#lbDtConstructor = LabelledData
#lbDtTrain = LabelledData.label_by_func(xx.train, parent_labeler)


xx

SplitData
Train: (ImageList)(395 items)
[PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000074.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000007.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000012.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000085.png'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000025.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000054.jpeg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000081.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000066.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000062.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000026.jpg')].../home/paul/fastaiMyData/bikes/ 

Valid: (ImageList)(183 items)
[PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000094.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000029.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000093.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrid

In [37]:
# Test labelled data class (processor returns the integer labelling the class)
lblDtTrain = LabelledData.label_by_func(iii, parent_labeler, proc_y = Processor())
lblDtTrain.x, lblDtTrain.y
#lblDtValid = LabelledData.label_by_func(xx.valid, parent_labeler, proc_y = Processor())


allDat = label_by_func(xx, parent_labeler, proc_y=Processor())
lblDtValid = LabelledData.label_by_func(xx.valid, parent_labeler, proc_y = allDat.train.proc_y)

allDat.train.proc_y, allDat.valid.proc_y, allDat, lblDtValid

(<__main__.Processor at 0x7f26c5de19b0>,
 <__main__.Processor at 0x7f26c5de19b0>,
 SplitData
 Train: LabelledData
 x: (ImageList)(395 items)
 [PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000074.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000007.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000012.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000085.png'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000025.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000054.jpeg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000081.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000066.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000062.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000026.jpg')].../home/paul/fastaiMyData/bikes/
 y: (ItemList)(395 items)
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0].../home/paul/fastaiMyData/bikes/
  
 
 Valid: LabelledData
 x: (ImageList)(183 items)
 [PosixPath('/home/paul/fasta

## Test transforms

In [38]:
type(toByteTensor(iii[0])), iii[0]

(torch.Tensor, <PIL.Image.Image image mode=LA size=500x375 at 0x7F26C5E90C50>)

In [39]:
trnfms = [make_rgb, ResizeFixed(128), toByteTensor, toFloatTensor] # transforms to make the image rgb, then convert a batch
# to byte tensors then to float tensors

pth = '/home/paul/fastaiMyData/bikes/' # image path

# use the from files decorator to get the images
imLst = ImageList.from_files(pth, trnsfrms = trnfms)

# Split data
sd = SplitData.split_by_func(imLst, partial(rand_splitter, trainFrac = 0.7))

# Label data
lblDtTrn = LabelledData.label_by_func(sd.train, parent_labeler, proc_y=Processor())
lblDtVal = LabelledData.label_by_func(sd.valid, parent_labeler, proc_y=lblDtTrn.proc_y)
lblDt = SplitData(lblDtTrn, lblDtVal)

lblDt.train.x[0:2]

[tensor([[[192., 192., 192.,  ..., 195., 195., 195.],
          [192., 192., 192.,  ..., 195., 195., 195.],
          [192., 192., 192.,  ..., 195., 195., 195.],
          ...,
          [133., 134., 129.,  ..., 116., 113., 114.],
          [133., 138., 140.,  ..., 133., 128., 124.],
          [134., 133., 133.,  ..., 131., 130., 132.]],
 
         [[255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          ...,
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.]]]),
 tensor([[[255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          ...,
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  .

In [40]:
imLst[8], len(lblDtTrn.y), len(lblDtVal.y), lblDt.train

(tensor([[[255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          ...,
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.]],
 
         [[255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          ...,
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.]]]), 416, 162, LabelledData
 x: (ImageList)(416 items)
 [PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000074.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000007.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000012.jpg'), PosixPath('/home/paul/fastaiMyData/bikes/hybrids/00000085.png'), Posi

## Data Loader

In [43]:
bs = 32
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler
train_dl = DataLoader(lblDt.train, bs, shuffle=True, drop_last=True)
valid_dl = DataLoader(lblDtVal, bs, shuffle=False)


class DataLoaderX():
    def __init__(self, ds, bs): self.ds,self.bs = ds,bs
    def __iter__(self):
        for i in range(0, len(self.ds), self.bs): yield self.ds[i:i+self.bs] # generator
    
def collate(b):
    xs,ys = zip(*b)
    return torch.stack(xs),torch.stack(ys)    

class DataLoaderXX():
    def __init__(self, ds, sampler, collate_fn=collate):
        self.ds,self.sampler,self.collate_fn = ds,sampler,collate_fn
        
    def __iter__(self):
        for s in self.sampler: yield self.collate_fn([self.ds[i] for i in s])
            
testDl = iter(DataLoaderX([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 2))
next(testDl)

testDl2 = DataLoaderX(lblDt.train, 3)
#for dx in enumerate(testDl2):
    #print(dx)
    

    
x, y = next(iter(train_dl))
#x, y = next(iter(train_dl))
#g = DataLoaderXX(lblDt.train, bs)
#next(iter(g))

#x = [lblDt.train[i] for i in range(4)]
#type(lblDt.train)
x

tensor([[[[255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          ...,
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.]],

         [[255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          ...,
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.]]],


        [[[255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          ...,
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  ..., 255., 255., 255.],
          [255., 255., 255.,  .

In [22]:
# databunch
#export
class DataBunch():
    def __init__(self, train_dl, valid_dl, c=None):
        self.train_dl,self.valid_dl,self.c = train_dl,valid_dl,c # c is final number of acitvations
        
    @property
    def train_ds(self): return self.train_dl.dataset
        
    @property
    def valid_ds(self): return self.valid_dl.dataset
    


def get_dls(train_ds, valid_ds, bs, **kwargs):
    return (DataLoader(train_ds, batch_size=bs, shuffle=True, **kwargs),
            DataLoader(valid_ds, batch_size=bs*2, **kwargs))