# Deep Set model

In [21]:
import torch
import torch.utils.data as D 
import torch.nn as nn
from modules import SAB, PMA, MAB
import pandas as pd

import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mpl_toolkits.axes_grid1 import make_axes_locatable
from sklearn.metrics import confusion_matrix
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier

## Model

In [142]:
class SmallDeepSet(nn.Module):
    def __init__(self, pool="max", in_feature=473):
        super().__init__()
        
        self.dec = nn.Sequential(
            nn.Linear(in_features=in_feature, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=12),
        )
        self.pool = pool

    def forward(self, x):
        if self.pool == "max":
            x = x.max(dim=1)[0]
        elif self.pool == "mean":
            x = x.mean(dim=1)
        elif self.pool == "sum":
            x = x.sum(dim=1)
        elif self.pool == "min":
            x = x.min(dim=1)[0]
        x = self.dec(x)
        return x#torch.softmax(x,1)


class SmallSetTransformer(nn.Module):
    def __init__(self, in_feature):
        super().__init__()

        self.dec = nn.Sequential(
            PMA(dim=in_feature, num_heads=1, num_seeds=1),
            nn.Linear(in_features=in_feature, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=12),
        )

    def forward(self, x):
        x = self.dec(x)
        return x#torch.softmax(x.squeeze(-1),2)
ds_model = SmallDeepSet("max", 473).cuda()
st_model = SmallSetTransformer(473).cuda()

In [94]:
class profile_DB(D.Dataset):
    def __init__(self, df):
        self.df = df
        self.meta = self.df[["TableNumber","ImageNumber"]].drop_duplicates()
        
        
    def __getitem__(self, index):
        table = self.meta.iloc[index]["TableNumber"]
        image = self.meta.iloc[index]["ImageNumber"]
        _data = self.df[(self.df["TableNumber"]==table) & (self.df["ImageNumber"]==image)]
        label = list(_data[["compound","concentration","moa"]].reset_index(drop=True).iloc[0])
        return torch.tensor(np.array(_data.drop(["Image_Metadata_Compound","Image_Metadata_Concentration",
                                         "compound","concentration","moa"], axis = 1))).float(), label
        
    def __len__(self):
        return len(self.meta)
profileDB = profile_DB(data)
loader = D.DataLoader(profileDB, batch_size=1, shuffle=False)

In [148]:
def train(model):
    num_epochs = 1
    for epoch in range(num_epochs):
        
        model = model.cuda()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
#         criterion = nn.L1Loss().cuda()
        criterion = nn.CrossEntropyLoss().cuda()
        losses = []
        
        for i,(batch, label) in enumerate(loader):
            batch = batch.cuda()
            
            loss = criterion(model(batch), torch.tensor(moa2digit(label[2][0])).cuda())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
    return losses
train(ds_model)

[11.006385803222656,
 9.652955055236816,
 13.775392532348633,
 15.31724739074707,
 7.275304794311523,
 4.586055755615234,
 28.359012603759766,
 21.965198516845703,
 2.1851844787597656,
 1.9885978698730469,
 6.460908889770508,
 0.5979423522949219,
 0.07814502716064453,
 1.4198274612426758,
 1.1290550231933594,
 3.8581924438476562,
 0.8333110809326172,
 2.413778305053711,
 0.020860671997070312,
 0.012617111206054688,
 0.026296615600585938,
 0.0,
 0.000545501708984375,
 0.0003528594970703125,
 0.6094818115234375,
 0.06470203399658203,
 0.027051925659179688,
 0.1600046157836914,
 0.0012722015380859375,
 0.000118255615234375,
 0.0010585784912109375,
 0.00353240966796875,
 0.000179290771484375,
 0.0002689361572265625,
 0.06015300750732422,
 0.001682281494140625,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 186.34649658203125,
 191.44387817382812,
 221.55224609375,
 193.4324951171875,
 164.6564

In [7]:
def moa_encoding(MoA):
    moa_list = list(moa["moa"].value_counts().index)
    encoding = []
    for i in range(12):
        if MoA == moa_list[i]:
            encoding.append(1)
        else:
            encoding.append(0)
    return torch.tensor(encoding).float().cuda()

In [122]:
def moa2digit(MoA):
    moa_list = moa["moa"].drop_duplicates().reset_index(drop = True)
    return moa_list[moa_list==MoA].index.tolist()

In [24]:
data = pd.read_csv("/home/user/michigan/att_pooling/Ljosa-profile/Ljosa_profile.csv", index_col=0)

In [6]:
moa = pd.read_csv("/home/user/michigan/data/BBBC021/BBBC021_v1_moa.csv")
moa = moa[moa["compound"] != "DMSO"]

In [156]:
batch, label = iter(loader).next()

In [158]:
batch.shape

torch.Size([1, 118, 473])

In [149]:
data.isnull().values.any()

True

In [153]:
data.loc[:, data.isnull().any()].columns

Index(['ObjectNumber', 'Nuclei_Children_Cells_Count',
       'Nuclei_Children_Cytoplasm_Count', 'Nuclei_AreaShape_Area',
       'Nuclei_AreaShape_Eccentricity', 'Nuclei_AreaShape_Solidity',
       'Nuclei_AreaShape_Extent', 'Nuclei_AreaShape_EulerNumber',
       'Nuclei_AreaShape_Perimeter', 'Nuclei_AreaShape_FormFactor',
       ...
       'Cytoplasm_Texture_SumAverage_CorrTub_3',
       'Cytoplasm_Texture_SumVariance_CorrTub_3',
       'Cytoplasm_Texture_SumEntropy_CorrTub_3',
       'Cytoplasm_Texture_Entropy_CorrTub_3',
       'Cytoplasm_Texture_DifferenceVariance_CorrTub_3',
       'Cytoplasm_Texture_DifferenceEntropy_CorrTub_3',
       'Cytoplasm_Texture_InfoMeas1_CorrTub_3',
       'Cytoplasm_Texture_InfoMeas2_CorrTub_3',
       'Cytoplasm_Texture_GaborX_CorrTub_3',
       'Cytoplasm_Texture_GaborY_CorrTub_3'],
      dtype='object', length=471)

In [152]:
data.shape

(148650, 478)