In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from pathlib import Path

import os
import sys
import datetime
import torch
import torchvision

import warnings; warnings.filterwarnings('ignore')

sys.path.append('..')
from config import CFG, NET
from src.dataloading import EvalDataset
CFG = CFG()
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', CFG.NCOLS)
pd.set_option('display.max_rows', CFG.NROWS)

In [2]:
meta_dir = Path('data')
dir_test = 'tiles/test/48/1/'

def process_df(df: pd.DataFrame):
    df['age'] = df['age'].str.slice(1, 3).astype(np.float32)
    df['age'] = (df['age'] - df['age'].min()) / (df['age'].max() - df['age'].min())
    df['sex'] = df['sex'].replace(1, 0).replace(2, 1).astype(np.float32)
    df['melanoma_history'] = df['melanoma_history'].replace({'YES': 1, "NO": 0}).fillna(-1).astype(np.float32)
    return df

In [3]:
test = pd.read_csv(meta_dir / 'test_metadata.csv')
test = process_df(test)

df = pd.DataFrame()
test = test.sort_values(by='filename').reset_index(drop=True)
test['tissue_id'] = test.filename.str.split('.').str[0].values

df = test.set_index('tissue_id')
files = sorted({p[:p.rindex('_')] for p in os.listdir(dir_test)})

df = df.loc[files].reset_index().sort_values(
    by=['tissue_id']).reset_index(drop=True)

print(f'training dataset: {df.shape[0]} samples')

fnames = [p for p in os.listdir(dir_test) if p.split('.')[1] == 'jpeg']
df1 = pd.DataFrame(fnames).rename(columns={0: 'tissue_id'})

df1['path'] = dir_test + df1['tissue_id']
df1['tissue_id'] = df1.tissue_id.str.rsplit('_', n=1, expand=True)[0]
df1['tile_id'] = df1['path'].str.split(
    '_').str[-1].str.split('.', expand=True)[0].astype(np.int16)
print(f'testing dataset: {df1.shape[0]} tiles')

n_tiles = []
for ix, row in df.iterrows():
    fn = row.tissue_id
    tiles = [f for f in fnames if fn in f]
    n_tiles.append(len(tiles))
df['n_tiles'] = n_tiles
df = df[df.n_tiles > 0]

df_test = pd.merge(df, df1, on='tissue_id').sort_values(
    by=['tissue_id', 'tile_id']).reset_index(drop=True)

#df_test = df_test.merge(test, on='filename')
nums = df_test.select_dtypes(include='number').columns
df_test[nums] = df_test[nums].astype(np.float32)

training dataset: 2 samples
testing dataset: 128 tiles


In [8]:
df_test

Unnamed: 0,tissue_id,filename,age,sex,body_site,melanoma_history,n_tiles,path,tile_id
0,8tn0wx0q,8tn0wx0q.tif,1.00,0.00,arm,0.00,64.00,tiles/test/48/1/8tn0wx0q_0.jpeg,0.00
1,8tn0wx0q,8tn0wx0q.tif,1.00,0.00,arm,0.00,64.00,tiles/test/48/1/8tn0wx0q_1.jpeg,1.00
2,8tn0wx0q,8tn0wx0q.tif,1.00,0.00,arm,0.00,64.00,tiles/test/48/1/8tn0wx0q_2.jpeg,2.00
3,8tn0wx0q,8tn0wx0q.tif,1.00,0.00,arm,0.00,64.00,tiles/test/48/1/8tn0wx0q_3.jpeg,3.00
4,8tn0wx0q,8tn0wx0q.tif,1.00,0.00,arm,0.00,64.00,tiles/test/48/1/8tn0wx0q_4.jpeg,4.00
...,...,...,...,...,...,...,...,...,...
123,qpbyhjj8,qpbyhjj8.tif,0.00,1.00,trunc,0.00,64.00,tiles/test/48/1/qpbyhjj8_59.jpeg,59.00
124,qpbyhjj8,qpbyhjj8.tif,0.00,1.00,trunc,0.00,64.00,tiles/test/48/1/qpbyhjj8_60.jpeg,60.00
125,qpbyhjj8,qpbyhjj8.tif,0.00,1.00,trunc,0.00,64.00,tiles/test/48/1/qpbyhjj8_61.jpeg,61.00
126,qpbyhjj8,qpbyhjj8.tif,0.00,1.00,trunc,0.00,64.00,tiles/test/48/1/qpbyhjj8_62.jpeg,62.00


In [7]:
EvalDataset(dir_test, meta_dir, 42, 64, 64, 384)

training dataset: 2 samples
testing dataset: 128 tiles
Index(['tissue_id', 'filename', 'age', 'sex', 'body_site', 'melanoma_history',
       'n_tiles', 'path', 'tile_id'],
      dtype='object')


<src.dataloading.EvalDataset at 0x7fd8844be290>

In [2]:
state = torch.load('assets/48-1-0.pt')

In [3]:
state

OrderedDict([('meta_model.l1.weight',
              tensor([[ 0.1457,  0.1418, -0.0403,  ..., -0.0540, -0.0178, -0.0623],
                      [ 0.1507, -0.1404, -0.0815,  ...,  0.1270,  0.0857, -0.0294],
                      [ 0.1134,  0.0404,  0.0689,  ...,  0.1407, -0.0298, -0.0145],
                      ...,
                      [ 0.1947,  0.1747,  0.1417,  ..., -0.0069, -0.0452, -0.0189],
                      [-0.0197,  0.0814, -0.1583,  ..., -0.0262, -0.0339, -0.0653],
                      [-0.1016, -0.0315,  0.1540,  ...,  0.1042,  0.1085, -0.0235]],
                     device='cuda:0')),
             ('meta_model.l1.bias',
              tensor([-8.6220e-02, -3.6409e-02, -8.7724e-02,  1.5812e-01, -9.3574e-04,
                      -5.1019e-02, -7.4729e-02, -1.4012e-01,  1.9793e-02,  1.7625e-01,
                       1.2346e-01,  1.5316e-01,  1.4865e-01,  9.3058e-02, -3.1371e-03,
                      -1.3777e-01,  1.4490e-01, -1.0752e-01, -1.0634e-01,  9.0445e-02,
      