In [6]:
import torch
import torchvision
import psycopg2
import torchvision.transforms as transforms
import torch.nn as nn
from tqdm.auto import tqdm
import io
from PIL import Image

from torch.utils.data import Dataset
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

In [7]:
torch.cuda.empty_cache()
batch_size=32
epochs=5
#device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"

In [8]:
transformation = transforms.Compose(
    [    
        transforms.Resize((256, 256)),
        transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    ]
)

base_transform = torchvision.transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    ]
)

In [9]:

class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform
    def __getitem__(self, index):
        img = self.data[index]
        if self.transform:
            img = self.transform(img)
        label = self.labels[index]
        return img, label

    def __len__(self):
        return len(self.data)

In [10]:

conn_select = psycopg2.connect(
    database="hse_medical",
    user='hse_medical',
    password='123456',
    host='127.0.0.1',
    port='5450',
    options="-c search_path=analyze_medical"
)

conn_select.autocommit = True

transform = transforms.ToTensor()

def get_connection():
    return conn_select

In [11]:


cursor = conn_select.cursor()

sql1 = f'''select 
    target,
    image from medical_pictures_train;'''
cursor.execute(sql1)
data_postgres = cursor.fetchall()
print(f"datatrain_size : {len(data_postgres)}")
cursor.close()
targets = []
images = []
for data in data_postgres:
    targets.append(data[0])
    image = Image.open(io.BytesIO(data[1]))
    images.append(image)
    

data_train = CustomDataset(images, targets,transform=transformation)

datatrain_size : 15588


In [12]:
cursor_test= conn_select.cursor()

sql2 = f'''select 
    target,
    image from medical_pictures_test;'''
cursor_test.execute(sql2)
data_postgres_test = cursor_test.fetchall()
cursor_test.close()
targets_test = []
images_test = []


for data in data_postgres_test:
    targets_test.append(data[0])
    bytes_io = io.BytesIO(data[1])
    image_open = Image.open(bytes_io)
    images_test.append(image_open)
    
    

data_test = CustomDataset(images_test, targets_test,transform=base_transform)

In [13]:
train_dataloader = torch.utils.data.DataLoader(
    data_train, batch_size=batch_size, shuffle=True, num_workers=4
)
val_dataloader = torch.utils.data.DataLoader(
    data_test, batch_size=batch_size, shuffle=False, num_workers=4
)

In [ ]:

vgg19 = torchvision.models.vgg19(pretrained=True)

vgg19.classifier[6] = nn.Linear(4096, 23)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg19.parameters(), lr=0.001, momentum=0.9)


In [14]:

for epoch in range(epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(tqdm(train_dataloader), 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = vgg19(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:  # print and visualize every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
            # Add code here to visualize the training process for each batch

print('Finished Training')

# Test the network and calculate testing accuracy
correct = 0
total = 0
with torch.no_grad():
    for data in tqdm(val_dataloader):
        images, labels = data
        outputs = vgg19(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /home/roman/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:50<00:00, 11.4MB/s] 


  0%|          | 0/488 [00:00<?, ?it/s]

[1,   100] loss: 2.838
[1,   200] loss: 2.543
[1,   300] loss: 2.468
[1,   400] loss: 2.366


  0%|          | 0/488 [00:00<?, ?it/s]

[2,   100] loss: 2.248
[2,   200] loss: 2.215
[2,   300] loss: 2.158
[2,   400] loss: 2.134


  0%|          | 0/488 [00:00<?, ?it/s]

[3,   100] loss: 2.019
[3,   200] loss: 1.971
[3,   300] loss: 1.949
[3,   400] loss: 1.967


  0%|          | 0/488 [00:00<?, ?it/s]

[4,   100] loss: 1.811
[4,   200] loss: 1.820
[4,   300] loss: 1.840
[4,   400] loss: 1.821


  0%|          | 0/488 [00:00<?, ?it/s]

[5,   100] loss: 1.696
[5,   200] loss: 1.628
[5,   300] loss: 1.662
[5,   400] loss: 1.690
Finished Training


  0%|          | 0/126 [00:00<?, ?it/s]

Accuracy of the network on the test images: 43 %
