In [1]:
import torch
import torchvision
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import Dataset
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
import os
from PIL import Image
from torchvision.transforms import transforms
from torch.utils.data.sampler import SubsetRandomSampler
os.environ["CUDA_VISIBLE_DEVICES"] ="0"

In [2]:
df = pd.read_csv('/home/dpakhom1/medicine/train/train_labels.csv')

In [4]:
transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                transforms.CenterCrop(32),
                                transforms.ToTensor(),
                                transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
                               ])

In [5]:
class dataset(Dataset):
    def __init__(self, csv_file, root_dir,  transform = None):
        self.df = pd.read_csv(os.path.join(root_dir, csv_file))
        self.transform = transform
        self.root_dir = root_dir
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        inputs_idx = self.df.iloc[index, 0]
        label = np.array(self.df.iloc[index, 1])
        #print(label)
        
        if type(inputs_idx) == str:
            inputs = [(self.root_dir + inputs_idx + '.tif')]
        else:
            inputs = []
            for idx in inputs_idx:
                inputs.append(self.root_dir + idx + '.tif')
        
        if self.transform:
            inputs = torch.stack([self.transform(Image.open(pic_path)) for pic_path in inputs])
            
        return torch.squeeze(inputs), torch.from_numpy(label)

In [6]:
train_set = dataset('train_labels.csv','/home/dpakhom1/medicine/train/', transform= transform)

In [7]:
validation_split = 0.1
dataset_size = len(train_set)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)


In [8]:
train_loader = DataLoader(train_set, batch_size = 64, drop_last=True, num_workers=2, sampler=train_sampler)
val_loader = DataLoader(train_set, batch_size=64, num_workers=2, sampler=valid_sampler)

In [9]:
len(train_loader)

3094

In [10]:
model = torchvision.models.resnet34(pretrained=True)

In [11]:
class classifier(nn.Module):
    
    def __init__(self):
        super(classifier, self).__init__()
        self.linear = nn.Linear(512, 2)
    
    def forward(self, x):
        return self.linear(x)

In [11]:
#for module in model.named_parameters():
 #   if not 'fc' in module[0]:
  #      module[1].requires_grad = False

In [12]:
model.fc = classifier()

In [13]:
model.cuda();

In [14]:
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                             lr = 0.00005, weight_decay=0.0001)
criterion = nn.CrossEntropyLoss()

In [17]:
val_best = 0
epochs = 10
for epoch in range(epochs):
    model.train()
    curr_loss = 0
    for idx, data in enumerate(train_loader):
        
        optimizer.zero_grad()
        
        inputs, labels = data
        inputs = inputs.float().cuda()
        labels = labels.long().cuda()
        
        outputs = model(inputs)
        labels = labels.squeeze()
        
        loss = criterion(outputs, labels)
        loss.backward()
        curr_loss += loss.item()
        
        if idx % 200 == 199:
            print('loss: ', curr_loss / 200)
            curr_loss = 0
        
        optimizer.step()
        
    model.eval()
    correct=0
    for j, data2 in enumerate(val_loader):
            inputs, labels = data2
            inputs = inputs.float().cuda()
            labels = labels.long().cuda()
        
            outputs = model(inputs)
            labels = labels.squeeze()
        
            loss = criterion(outputs, labels)
               # print(torch.equal(idx,labels))
            _, ans = torch.max(outputs, dim=1)
            correct += sum(ans == labels).item()
            #curr_loss += loss.item()
                
            if j % 100 == 49:
                print(loss.item())
    curr_val = correct/(64*len(val_loader))
    print("correct: ", curr_val)
    if curr_val > val_best:
        val_best = curr_val
        torch.save(model.state_dict(), '/home/dpakhom1/medicine/resnet34_pretr.pth')
            
                

loss:  0.5115306620299817
loss:  0.4488185006380081
loss:  0.4334354178607464
loss:  0.419516878426075
loss:  0.4136935178935528
loss:  0.39992320388555525
loss:  0.3910246230661869
loss:  0.38673665083944797
loss:  0.38655309937894344
loss:  0.39412770718336104
loss:  0.3831723516434431
loss:  0.37413201823830605
loss:  0.3678660435974598
loss:  0.37179297097027303
loss:  0.3701955898106098
0.3474794030189514
0.3494543731212616
0.3886283040046692
correct:  0.8487917877906976
loss:  0.35112061239778997
loss:  0.35354915261268616
loss:  0.3489705976843834
loss:  0.3468678653985262
loss:  0.3463729167729616
loss:  0.34035494036972525
loss:  0.34644200749695303
loss:  0.3491999751329422
loss:  0.34481134384870527
loss:  0.33687983587384224
loss:  0.3430942448228598
loss:  0.3346645687520504
loss:  0.3542029170691967
loss:  0.33800760984420775
loss:  0.33818094313144687
0.3288825750350952
0.20614884793758392
0.40138331055641174
correct:  0.858375726744186
loss:  0.32542122408747676
loss:  

In [18]:
model.load_state_dict(torch.load('/home/dpakhom1/medicine/resnet34_pretr.pth'))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [15]:
#torch.save(model.state_dict(), '/home/dpakhom1/medicine/model_pretrained_res34.pth')

In [14]:
model.eval()
for i in range(50):
    print(torch.nn.functional.softmax(model(torch.unsqueeze(train_set[i][0], 0).cuda()), dim = 1))
    print(train_set[i][1])

tensor([[0.9517, 0.0483]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(0)
tensor([[0.8694, 0.1306]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(1)
tensor([[0.9490, 0.0510]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(0)
tensor([[0.9508, 0.0492]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(0)
tensor([[0.9444, 0.0556]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(0)
tensor([[0.9383, 0.0617]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(0)
tensor([[2.7266e-04, 9.9973e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(1)
tensor([[0.0048, 0.9952]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(1)
tensor([[0.9971, 0.0029]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(0)
tensor([[0.9976, 0.0024]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(0)
tensor([[0.9071, 0.0929]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(0)
tensor([[0.0114, 0.9886]], device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor(1)
tensor([[0.9167, 0.0

In [19]:
test_transform = transforms.Compose([transforms.CenterCrop(32),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
                                    ])

In [20]:
import csv
model.eval()
path = '/home/dpakhom1/medicine/test/'
csv_data = ['id', 'label']
with open('ans.csv', 'w') as file_csv:
    writer = csv.writer(file_csv)
    writer.writerow(csv_data)
    for sample in os.listdir(path):
        if sample[-4:] == '.tif':
            image = Image.open(os.path.join(path, sample))
            inputs = torch.unsqueeze(test_transform(image), dim=0).cuda()
            outputs = model(inputs)
            _, idx = torch.max(outputs, dim = 1)
            writer.writerow([sample[:-4], idx.item()])
#for sample in os.listdir('/home/dpakhom1/medicine/test/'):


In [44]:
df = pd.read_csv('/home/dpakhom1/medicine/ans.csv')
df.head()

Unnamed: 0,id,label
0,dbc7573e5b13d8aab1d8a6ef4bf0bda404505f46,0
1,c347a27c7da7f1549e2af34e22a4a151b2afb359,0
2,b57e413c8862e66c9fe5e34b9780f2aa0b69feca,0
3,b8ac1c9a933f1351e1223e677928ee30ad314770,0
4,c4e8b0ec618f253f74b3e5ecb50dbbfab50e8e9b,0
