In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
#export
from exp.nb_04 import *

In [3]:
path= Path('../PCImages')
path2fm= '../FeatureMatsMerged/TheGreatCollection.txt'
path2colnames= 'FeatureMatIndex.txt'

In [4]:
fm= fm_from_txt(path2fm, path2colnames)
fm.shape

(56657, 105)

In [5]:
cols_filtered=get_filtered_cols(fm)
len(cols_filtered)

44

In [6]:
#export
def normalize(df): return (df-df.mean())/df.std()

In [7]:
#export
import pkbar
import time

## Make it faster

In [9]:
#export
import fnmatch
Path.ls = lambda x: list(x.iterdir())

In [156]:
#export
torch.cuda.set_device(device)

In [151]:
#export
class CellMixUpDataSet(Dataset):
    def __init__(self, path, fm, cols2keep, label = 'label1', mixup_cs=[.4, .3, .3], trfms=None, random_sample=None):
        self.fm= fm
        if random_sample is not None: self.fm= self.fm.sample(n=random_sample, random_state=0)
        self.cols2keep= cols2keep
        self.fm[self.cols2keep]= normalize(self.fm[self.cols2keep])
        self.ids, self.ys= zip(*id2label(self.fm.id, self.fm[label]).items())
        self.data_files= path.ls()
        self.trfms = get_trfms(trfms)
        self.cs= mixup_cs
        self.l= len(self.ids[0])+1

    def __len__(self):
        return len(self.ids)
    
    def get_row(self, img_id):
        x_np= np.array(self.fm.loc[self.fm['id'].isin([img_id])][self.cols2keep]).flatten() 
        return torch.cuda.FloatTensor(x_np)
    
    def __getitem__(self, i):
        try:
            img_id=    self.ids[i]
            label=     self.ys[i]
            img_names= [f for f in self.data_files if (img_id.split('_')[0] in f.name) 
                        and (img_id.split('_')[1] in f.name)]
            image =    sum([c*io.imread(name) for c,name in zip(self.cs, img_names)])
            fm_row=    self.get_row(img_id)
            image=     Image.fromarray(np.uint8(image))
            if self.trfms:
                image = self.trfms(image)
        except IndexError:
            return None
        return image, fm_row, label

In [152]:
#export
def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)

In [147]:
#export
def ds2dls(ds, bs, val_split = 0.2, shuffle_ds = True, random_seed = 0):

    ds_size = len(ds)
    inds = list(range(ds_size))
    split = int(np.floor(val_split * ds_size))
    if shuffle_ds:
        np.random.seed(random_seed)
        np.random.shuffle(inds)
    train_inds, val_inds = inds[split:], inds[:split]

    ts = SubsetRandomSampler(train_inds)
    vs = SubsetRandomSampler(val_inds)

    train_dl = DataLoader(ds, batch_size=bs, collate_fn=collate_fn, sampler=ts)
    valid_dl = DataLoader(ds, batch_size=bs, collate_fn=collate_fn, sampler=vs)

    return train_dl, valid_dl

In [224]:
#export
class Learner():
    def __init__(self, dataset, model, bs=8):
        self.bs=bs
        self.ds= dataset 
        self.img_size, self.c= self.ds[0][0].shape[0], len(np.unique(self.ds.ys))
        self.model= model #get_model(model.cuda(), image_size=self.img_size, c=self.c)
        self.loss= nn.CrossEntropyLoss()
        self.train_dl, self.valid_dl= ds2dls(self.ds, bs=self.bs)
        
    def fit(self, epochs=1, lr = 1e-5):
        opt= torch.optim.Adam(self.model.parameters(), lr=lr)
        for epoch in range(epochs):
            kbar = pkbar.Kbar(target=len(self.ds)//self.bs, epoch=epoch, 
                              num_epochs=epochs, width=1, always_stateful=False)
            self.model.train()
            for i, [img_xb, data_xb, yb] in enumerate(self.train_dl):
                img_xb=  img_xb.to(device)
                data_xb= data_xb.to(device)
                yb= yb.to(device)
                pred = self.model(img_xb, data_xb)
                loss = self.loss(pred, yb)
                acc  = accuracy(pred,yb)
                loss.backward()
                opt.step()
                opt.zero_grad()
                
                kbar.update(i, values=[("train loss", loss), ("train acc", acc)])

            self.model.eval()
            with torch.no_grad():
                tot_loss,tot_acc = 0.,0.
                for i, [img_xb, data_xb, yb] in enumerate(self.valid_dl):
                    img_xb=  img_xb.to(device)
                    data_xb= data_xb.to(device)
                    yb= yb.to(device)
                    pred = self.model(img_xb, data_xb)
                    pred= pred.to(device)
                    loss      = self.loss(pred, yb)
                    tot_loss += loss
                    acc       = accuracy(pred,yb)
                    tot_acc  += acc
                    
                    kbar.update(i, values=[("valid loss", loss), ("valid acc", acc)])
            nv = len(self.valid_dl)
            print('\n Total vali loss and accuracy: ', tot_loss.data.cpu()/nv, tot_acc.data.cpu()/nv)
        #return tot_loss/nv, tot_acc/nv

In [227]:
ds= CellMixUpDataSet(path, fm, cols_filtered, label = 'label1', 
                mixup_cs=[.5, .25, .25],
                trfms= [CenterCrop(size=350), 
                       transforms.Grayscale(num_output_channels=1), 
                       transforms.ToTensor(),
                       transforms.Normalize(mean=[0.18], std=[0.12])
                       ], 
                random_sample=None)

In [226]:
learn= Learner(ds, CombinedModel(50,10, p=0.5), bs=2)
learn.fit(3, lr=1e-5)

Epoch: 1/3
0/5 [.] - ETA: 0s - train loss: 1.9603 - train acc: 0.0000e+00 - valid loss: -6.0909 - valid acc: 0.0000e+00
 Total vali loss and accuracy:  tensor(2.0303) tensor(0.)
Epoch: 2/3
0/5 [.] - ETA: 0s - train loss: 1.9931 - train acc: 0.0000e+00 - valid loss: -6.3963 - valid acc: 0.0000e+00
 Total vali loss and accuracy:  tensor(2.1321) tensor(0.)
Epoch: 3/3
0/5 [.] - ETA: 0s - train loss: 1.9541 - train acc: 0.0000e+00 - valid loss: -6.6809 - valid acc: 0.0000e+00
 Total vali loss and accuracy:  tensor(2.2270) tensor(0.)


In [None]:
torch.save(learn.model, 'models/')
torch.save(learn.train_dl, 'models/train_dl.pth')
torch.save(learn.valid_dl, 'models/valid_dl.pth')