In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
#export
from exp.nb_02 import *

In [3]:
path= Path('../PCImages')
path2fm= '../FeatureMatsMerged/TheGreatCollection.txt'
path2colnames= 'FeatureMatIndex.txt'

In [4]:
fm= fm_from_txt(path2fm, path2colnames)
fm.shape

(56657, 105)

In [5]:
fm.head()

Unnamed: 0,id,date,time,label1,label2,label3,ind,wallcrash,temp,LP,...,err_peKV_ElRel_2s,E_aKV_ElRel_2s,eta_aKV_ElRel_2s,Act_aKV_ElRel_2s,err_aKV_ElRel_2s,E_aeKV_ElRel_2s,E2_aeKV_ElRel_2s,eta_aeKV_ElRel_2s,Act_aeKV_ElRel_2s,err_aeKV_ElRel_2s
0,180322140137,180322.0,140137.0,0,100.0,0.0,1.0,0.0,18.0,875.0,...,0.00084,34.801664,33.46624,3.135231e-05,0.000837,35.18287,1091.7549,36.509799,0.000338,0.000819
1,180322140233,180322.0,140233.0,0,100.0,0.0,2.0,0.0,18.01,875.0,...,0.000397,29.482185,20.546622,2.180137e-07,0.00253,1.576053e-07,64.210986,111.86186,3.1e-05,0.000343
2,180322140601,180322.0,140601.0,0,100.0,0.0,3.0,38.956936,17.98,875.0,...,0.028127,17.579015,3.970008,1.079131,0.012464,18.76191,210.27841,4.268777,1.056558,0.012754
3,180322140713,180322.0,140713.0,0,100.0,0.0,4.0,0.0,17.99,875.0,...,0.00075,62.245363,39.791365,0.2516519,0.000616,51.71595,993.69425,50.203118,0.320308,0.000632
4,180322140852,180322.0,140852.0,0,100.0,0.0,5.0,0.0,17.99,875.0,...,0.002178,28.424611,9.381201,2.280459e-08,0.001762,28.7011,1567.1203,9.746181,0.004185,0.001797


In [6]:
cols= ['id',
 'date',
 'time',
 'label1',
 'label2',
 'label3',
 'ind',
 'wallcrash',
 'temp',
 'LP',
 'HL',
 't_stretch',
 't_relax',
 'framecut',
 'fps',
 'medium',
 'passage',
 'ms_ch1',
 'mf_ch1',
 'mf_real_ch1',
 'cv_nn_ch1',
 'cvn_ch1',
 'ms_ch2',
 'mf_ch2',
 'mf_real_ch2',
 'cv_nn_ch2',
 'cvn_ch2']

In [17]:
#export
def get_row(fm, img_id, cols2drop):
    return torch.cuda.FloatTensor(np.array(fm.loc[fm['id'].isin([img_id])].drop(columns=cols2drop)).flatten())

In [18]:
class CellDataSet(Dataset):
    def __init__(self, path, fm, cols2drop= cols, label = 'label1', cell_phase = None, trfms=None, 
                 random_sample=None):
        self.fm = fm
        self.cols2drop= cols2drop
        self.y = self.fm[label]
        self.id2label= id2label(self.fm.id, self.y)
        if cell_phase is None:
            self.data_files = get_filtered_files(path, img_ids=self.fm.id, labels=self.y) 
        else: 
            self.data_files= [
                f for f in get_filtered_files(path, img_ids=self.fm.id, labels=self.y) if cell_phase in str(f)
            ]
        if random_sample is not None: self.data_files= random.sample(self.data_files, random_sample)
        self.trfms = get_trfms(trfms)
        
    def __getindex__(self, idx):
        return load_file(self.data_files[idx])

    def __len__(self):
        return len(self.data_files)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        img_name  = self.data_files[idx]
        img_id, phase = split_fn(img_name)
        label=          self.id2label[img_id]
        image =         PIL.Image.open(img_name)
        fm_row=         get_row(self.fm, img_id, self.cols2drop)

        if self.trfms:
            image = self.trfms(image)

        return image, fm_row, label

In [19]:
ds= CellDataSet(path, fm, label = 'label1', cell_phase = None, 
               trfms= [CenterCrop(size=350), 
                       transforms.Grayscale(num_output_channels=1), 
                       ToFloatTensor()], 
               random_sample=100)

In [20]:
ds[0][1].shape

torch.Size([78])

In [35]:
#export
import torch.nn.functional as F

### Concatenate models:

In [62]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.fc0= Resize(350).cuda()
        self.cnn = models.resnet34(pretrained=True).cuda()
        self.cnn.conv1= nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False).cuda()
        self.cnn.fc= nn.Linear(in_features=512, out_features=50).cuda()
        
        self.fc1 = nn.Linear(78+50, 30).cuda()
        self.fc2 = nn.Linear(30, 7).cuda()
        
    def forward(self, image, data):
        x1 = self.cnn(self.fc0(image)) #(1,20)
        x2 = data                      #(1,78)
        x = torch.cat((x1, x2), dim=1) #(1,98)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
        

In [63]:
class Learner():
    def __init__(self, dataset, model):
        self.ds= dataset 
        self.img_size, self.c= self.ds[0][0].shape[0], len(np.unique(self.ds.y))
        self.model= model #get_model(model.cuda(), image_size=self.img_size, c=self.c)
        self.loss= nn.CrossEntropyLoss()
        
    def fit(self, epochs=1, bs=32, lr = 1e-5):
        opt= torch.optim.Adam(self.model.parameters(), lr=lr)
        train_dl, valid_dl= ds2dls(self.ds, bs=bs)
        for epoch in range(epochs):
            self.model.train()
            for img_xb, data_xb, yb in train_dl:
                img_xb=  img_xb.to(device)
                data_xb= data_xb.to(device)
                yb= yb.to(device)
                loss = self.loss(self.model(img_xb, data_xb), yb)
                loss.backward()
                opt.step()
                opt.zero_grad()

            self.model.eval()
            with torch.no_grad():
                tot_loss,tot_acc = 0.,0.
                for img_xb, data_xb, yb in valid_dl:
                    img_xb=  img_xb.to(device)
                    data_xb= data_xb.to(device)
                    yb= yb.to(device)
                    pred = self.model(img_xb, data_xb)
                    pred= pred.to(device)
                    tot_loss += self.loss(pred, yb)
                    tot_acc  += accuracy(pred,yb)
            nv = len(valid_dl)
            print(epoch, tot_loss/nv, tot_acc/nv)
        #return tot_loss/nv, tot_acc/nv

In [64]:
ds= CellDataSet(path, fm, label = 'label1', cell_phase = None, 
               trfms= [CenterCrop(size=350), 
                       transforms.Grayscale(num_output_channels=1), 
                       ToFloatTensor()], 
               random_sample=1000)
learn= Learner(ds, MyModel())
learn.fit(1, bs=8, lr=1e-5)

0 tensor(533.1910, device='cuda:0') tensor(0.1100, device='cuda:0')


## Model for tabular data: 

In [65]:
class TabularModel(nn.Module):
    "Basic model for tabular data."
    def __init__(self, emb_szs, n_cont:int, out_sz:int, layers, ps=None,
                 emb_drop:float=0., y_range=None, use_bn:bool=True, bn_final:bool=False):
        super().__init__()
        ps = ifnone(ps, [0]*len(layers))
        ps = listify(ps, layers)
        self.embeds = nn.ModuleList([nn.Embedding(ni, nf) for ni,nf in emb_szs]) #type: torch.nn.modules.container.ModuleList
        self.emb_drop = nn.Dropout(emb_drop) #type: torch.nn.modules.dropout.Dropout
        self.bn_cont = nn.BatchNorm1d(n_cont) #type torch.nn.modules.batchnorm.BatchNorm1d
        n_emb = sum(e.embedding_dim for e in self.embeds) # n_emb = 17 , type: int
        self.n_emb,self.n_cont,self.y_range = n_emb,n_cont,y_range
        sizes = [n_emb + n_cont] + layers + [out_sz] #typeL list, len: 4
        actns = [nn.ReLU(inplace=True) for _ in range(len(sizes)-2)] + [None] #type: list, len: 3.  the last in None because we finish with linear
        layers = []
        for i,(n_in,n_out,dp,act) in enumerate(zip(sizes[:-1],sizes[1:],[0.]+ps,actns)):
            layers += bn_drop_lin(n_in, n_out, bn=use_bn and i!=0, p=dp, actn=act)
        if bn_final: layers.append(nn.BatchNorm1d(sizes[-1]))
        self.layers = nn.Sequential(*layers) #type: torch.nn.modules.container.Sequential
        



    def forward(self, x_cat, x_cont):
        if self.n_emb != 0:
            x = [e(x_cat[:,i]) for i,e in enumerate(self.embeds)] #take the embedding list and grab an embedding and pass in our single row of data.        
            x = torch.cat(x, 1) # concatenate it on dim 1 ## remeber that the len is the batch size
            x = self.emb_drop(x) # pass it through a dropout layer
        if self.n_cont != 0:
            x_cont = self.bn_cont(x_cont) # batchnorm1d
            x = torch.cat([x, x_cont], 1) if self.n_emb != 0 else x_cont # combine the categircal and continous variables on dim 1
        x = self.layers(x)
        if self.y_range is not None:
            x = (self.y_range[1]-self.y_range[0]) * torch.sigmoid(x) + self.y_range[0] # deal with y_range
        return x.squeeze()