In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import os
os.sys.path.append('../fastai/') #fastai version 1
from pathlib import Path
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

from fastai import *
from fastai.vision import *

from dataset import ImageMulti4Channel
from resnet import Resnet4Channel

In [None]:
bs = 64

In [None]:
path = Path('/home/Deep_Learner/work/datasets/human-protein-atlas-image-classification/stage1_data')

In [None]:
df = pd.read_csv(path/'train.csv')
df.head()

In [None]:
fns = pd.Series([id + '.png' for id in df.Id])
labels = [targ.split(' ') for targ in df.Target]

In [None]:
np.random.seed(42)
trn_ds, val_ds = ImageMulti4Channel.from_folder(path, 'train_pngs', fns, labels, classes=[str(i) for i in range(28)])

In [None]:
test_ids = list(sorted({fname.split('_')[0] for fname in os.listdir(path/'test_pngs')}))

In [None]:
test_ds,_ = ImageMulti4Channel.from_folder(
    path, 
    'test_pngs', 
    pd.Series(test_ids), 
    [['0'] for _ in range(len(test_ids))],
    valid_pct=0,
    classes=['0'],
)

In [None]:
trn_tfms,_ = get_transforms(do_flip=True, flip_vert=True, max_rotate=30., max_zoom=1,
                      max_lighting=0.05, max_warp=0.)

In [None]:
protein_stats = ([0.08069, 0.05258, 0.05487, 0.08282], [0.13704, 0.10145, 0.15313, 0.13814])

In [None]:
data = ImageDataBunch.create(trn_ds, val_ds, test_ds=test_ds, path=path, bs=bs, ds_tfms=(trn_tfms, []), num_workers=8, size=224).normalize(protein_stats)

In [None]:
resnet50 = Resnet4Channel(encoder_depth=50)

In [None]:
f1_score = partial(fbeta, thresh=0.5, beta=1)

In [None]:
learn = ClassificationLearner(
    data=data,
    model=resnet50,
    loss_func=F.binary_cross_entropy_with_logits,
    path=path,    
    metrics=[f1_score], 
)

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
lr = 0.01

In [None]:
learn.fit_one_cycle(20, slice(lr))

In [None]:
learn.save('resnet50_basic')

In [None]:
preds,_ = learn.get_preds(DatasetType.Test)

In [None]:
pred_labels = [' '.join(list([str(i) for i in np.nonzero(row>0.5)[0]])) for row in np.array(preds)]
df = pd.DataFrame({'Id':test_ids,'Predicted':pred_labels})
df.to_csv(path/'protein_predictions.csv', header=True, index=False)