In [1]:
from fastai.vision.all import *
import timm

In [2]:
Path.BASE_PATH = path = Path('mask')

In [3]:
cd /notebooks/

/notebooks


In [4]:
df = pd.read_csv(path/"train_labels.csv")

In [5]:
train_list = df.image.to_list()

In [6]:
files = get_image_files(path/'images')

In [7]:
splitter = FuncSplitter(lambda o: Path(o).name not in train_list)
new_files = splitter(files)
train, test = new_files

In [8]:
train_path, test_path = files[train], files[test]

In [9]:
tst_files = test_path.sorted()

In [10]:
def train(arch, size, item=Resize(480, method='squish'), accum=1, finetune=True, epochs=11):
    dls = ImageDataLoaders.from_csv(path, 'train_labels.csv', folder='images', item_tfms=item, 
                                    batch_tfms=aug_transforms(size=size, min_scale=0.75), bs=64//accum)
    cbs = GradientAccumulation(64) if accum else []
    learn = vision_learner(dls, arch, metrics=error_rate, cbs=cbs).to_fp16()
    if finetune:
        learn.fine_tune(epochs, 0.01)
        return learn.tta(dl=dls.test_dl(tst_files))
    else:
        learn.unfreeze()
        learn.fit_one_cycle(epochs, 0.01)

In [None]:
train('convnext_small_in22k', 128, epochs=1, accum=1, finetune=False)

Creating a function to find out how much memo

In [11]:
import gc
def report_gpu():
    print(torch.cuda.list_gpu_processes())
    gc.collect()
    torch.cuda.empty_cache()

In [None]:
report_gpu()

In [None]:
train('convnext_small_in22k', 128, epochs=1, accum=2, finetune=False)
report_gpu()

In [None]:
train('convnext_small_in22k', 128, epochs=1, accum=4, finetune=False)
report_gpu()

Trying out on larger models as well

In [None]:
train('convnext_large_in22k', 224, epochs=1, accum=2, finetune=False)
report_gpu()

In [None]:
train('convnext_large_in22k', (320,240), epochs=1, accum=2, finetune=False)
report_gpu()

In [None]:
train('vit_large_patch16_224', 224, epochs=1, accum=2, finetune=False)
report_gpu()

In [None]:
train('swinv2_large_window12_192_22k', 192, epochs=1, accum=2, finetune=False)
report_gpu()

In [None]:
train('swin_large_patch4_window7_224', 224, epochs=1, accum=2, finetune=False)
report_gpu()

## Running the models

In [12]:
res = 640,480

In [13]:
models = {
    'convnext_large_in22k': {
        (Resize(res), (320,224)),
    }, 'vit_large_patch16_224': {
        (Resize(480, method='squish'), 224),
        (Resize(res), 224),
    }, 'swinv2_large_window12_192_22k': {
        (Resize(480, method='squish'), 192),
        (Resize(res), 192),
    }, 'swin_large_patch4_window7_224': {
        (Resize(res), 224),
    }
}

In [14]:
tta_res = []

for arch,details in models.items():
    for item,size in details:
        print('---',arch)
        print(size)
        print(item.name)
        tta_res.append(train(arch, size, item=item, accum=2)) #, epochs=1))
        gc.collect()
        torch.cuda.empty_cache()

--- convnext_large_in22k
(320, 224)
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.528912,0.033447,0.011494,00:16


epoch,train_loss,valid_loss,error_rate,time
0,0.205775,0.022893,0.011494,00:18
1,0.170827,0.023931,0.007663,00:18
2,0.112893,0.042032,0.011494,00:18
3,0.10748,0.02601,0.015326,00:18
4,0.109317,0.034001,0.015326,00:18
5,0.080808,0.011324,0.007663,00:18
6,0.074752,0.017277,0.003831,00:18
7,0.059131,0.027548,0.011494,00:18
8,0.049956,0.023685,0.011494,00:19
9,0.040177,0.02145,0.007663,00:18


--- vit_large_patch16_224
224
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.408517,0.290234,0.034483,00:15


epoch,train_loss,valid_loss,error_rate,time
0,0.228726,0.089819,0.019157,00:20
1,0.128848,0.122089,0.015326,00:20
2,0.114197,0.128434,0.019157,00:20
3,0.121582,0.089363,0.019157,00:20
4,0.095322,0.113424,0.019157,00:20
5,0.077822,0.157964,0.019157,00:20
6,0.063216,0.109011,0.022989,00:20
7,0.042543,0.102398,0.015326,00:20
8,0.03032,0.110566,0.015326,00:20
9,0.024766,0.10914,0.015326,00:20


--- vit_large_patch16_224
224
Resize -- {'size': (480, 480), 'method': 'squish', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.360907,0.163831,0.030651,00:16


epoch,train_loss,valid_loss,error_rate,time
0,0.118602,0.055188,0.007663,00:20
1,0.067771,0.256709,0.030651,00:20
2,0.074839,0.081039,0.019157,00:20
3,0.068248,0.00636,0.003831,00:20
4,0.046113,0.032399,0.015326,00:20
5,0.032643,0.036809,0.003831,00:20
6,0.035688,0.039917,0.007663,00:20
7,0.019776,0.020862,0.007663,00:20
8,0.010625,0.020211,0.003831,00:20
9,0.006001,0.017093,0.003831,00:20


--- swinv2_large_window12_192_22k
192
Resize -- {'size': (480, 480), 'method': 'squish', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


epoch,train_loss,valid_loss,error_rate,time
0,0.27114,0.023848,0.007663,00:16


epoch,train_loss,valid_loss,error_rate,time
0,0.102977,0.026679,0.011494,00:19
1,0.067219,0.004423,0.0,00:19
2,0.051003,0.072073,0.007663,00:19
3,0.059921,0.128108,0.015326,00:19
4,0.058534,0.097496,0.011494,00:19
5,0.042039,0.040002,0.007663,00:19
6,0.031414,0.089709,0.011494,00:19
7,0.022298,0.051576,0.007663,00:19
8,0.016996,0.025647,0.003831,00:19
9,0.011649,0.025308,0.003831,00:19


--- swinv2_large_window12_192_22k
192
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.475361,0.031292,0.011494,00:16


epoch,train_loss,valid_loss,error_rate,time
0,0.164719,0.03175,0.015326,00:19
1,0.149213,0.017172,0.007663,00:19
2,0.133545,0.040739,0.015326,00:19
3,0.106156,0.002581,0.0,00:19
4,0.082446,0.011362,0.003831,00:19
5,0.063535,0.001442,0.0,00:19
6,0.049381,0.000159,0.0,00:19
7,0.053427,0.000529,0.0,00:19
8,0.041773,0.00232,0.0,00:19
9,0.037002,0.001347,0.0,00:19


--- swin_large_patch4_window7_224
224
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.322425,0.016604,0.007663,00:14


epoch,train_loss,valid_loss,error_rate,time
0,0.201168,0.022001,0.007663,00:17
1,0.151857,0.029237,0.007663,00:17
2,0.119023,0.015972,0.007663,00:17
3,0.10748,0.036584,0.015326,00:17
4,0.095733,0.010345,0.003831,00:17
5,0.083127,0.009229,0.003831,00:17
6,0.066249,0.006909,0.003831,00:17
7,0.059111,0.007943,0.003831,00:17
8,0.049361,0.006698,0.003831,00:17
9,0.043565,0.006597,0.003831,00:17


In [15]:
save_pickle('tta_res.pkl', tta_res)

In [16]:
tta_prs = first(zip(*tta_res))

In [17]:
tta_prs += tta_prs[1:3]

In [18]:
avg_pr = torch.stack(tta_prs).mean(0)
avg_pr.shape

torch.Size([509, 2])

In [19]:
avg_pr

TensorBase([[5.0648e-01, 4.9352e-01],
            [9.9995e-01, 4.9031e-05],
            [6.9722e-06, 9.9999e-01],
            ...,
            [9.9957e-01, 4.2630e-04],
            [4.7414e-02, 9.5259e-01],
            [9.6242e-01, 3.7576e-02]])

In [20]:
ss = pd.read_csv(path/'SampleSubmission.csv')
ss

Unnamed: 0,image,target
0,aadawlxbmapqrblgxyzarhjasgiobu.png,0
1,abpxvdfyhaaohzcrngcucmhffwizxs.jpg,0
2,aclkjfvackiieiznzfcwienplielrj.jpg,0
3,aelkivmayxgsdjosiibfgmkvfrjvjk.jpg,0
4,aelzzshpfxerelefnkatpczktuxjln.jpg,0
...,...,...
504,zpfhbgixcctxylnihhrepjgcmjksvf.jpg,0
505,zuignwckjykxefmpubjpjefvhghaoi.jpg,0
506,zuxvwdblfwjpibjexgfglpyreqslhs.jpg,0
507,zvyajojutzgqumdjfxxkpiuuxeefyy.jpg,0


In [21]:
x = torch.max(avg_pr, 1)

In [22]:
y = torch.argmax(avg_pr, dim=1)

In [23]:
res = torch.where(y==0, 1-x[0], x[0])

In [24]:
results = pd.Series(res.numpy(), name="target").round(2)
results

0      0.49
1      0.00
2      1.00
3      0.91
4      0.00
       ... 
504    0.00
505    0.00
506    0.00
507    0.95
508    0.04
Name: target, Length: 509, dtype: float32

In [26]:
ss['target'] = results
ss.to_csv('subm3.csv', index=False)
!head subm3.csv

image,target
aadawlxbmapqrblgxyzarhjasgiobu.png,0.49
abpxvdfyhaaohzcrngcucmhffwizxs.jpg,0.0
aclkjfvackiieiznzfcwienplielrj.jpg,1.0
aelkivmayxgsdjosiibfgmkvfrjvjk.jpg,0.91
aelzzshpfxerelefnkatpczktuxjln.jpg,0.0
ajnjnkvstqesvfohaptobyasrkmcnq.jpg,0.0
akkydnvilgvzigllmboonbqbbioocs.jpg,1.0
akvjfjhgayyjwrqrczjbeyoqhhajis.jpg,0.0
alcveyvosnywyjbmngolmnblftdoje.jpg,0.0
