In [1]:
import torch
import torchvision
import psycopg2
import torchvision.transforms as transforms
import torch.nn as nn
from tqdm.auto import tqdm
import io
from PIL import Image
from sklearn.metrics import roc_curve, auc
from torch.utils.data import Dataset
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import pickle




In [2]:
torch.cuda.empty_cache()
batch_size=32
num_classes=23
epochs=15
#device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"

In [3]:
transformation = transforms.Compose(
    [    
        transforms.Resize((256, 256)),
        transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    ]
)

base_transform = torchvision.transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    ]
)

In [4]:

class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform
    def __getitem__(self, index):
        img = self.data[index]
        if self.transform:
            img = self.transform(img)
        label = self.labels[index]
        return img, label

    def __len__(self):
        return len(self.data)

In [5]:

conn_select = psycopg2.connect(
    database="hse_medical",
    user='hse_medical',
    password='123456',
    host='127.0.0.1',
    port='5450',
    options="-c search_path=analyze_medical"
)

conn_select.autocommit = True

transform = transforms.ToTensor()

def get_connection():
    return conn_select

In [6]:


cursor = conn_select.cursor()

sql1 = f'''select 
    target,
    image from medical_pictures_train;'''
cursor.execute(sql1)
data_postgres = cursor.fetchall()
print(f"datatrain_size : {len(data_postgres)}")
cursor.close()
targets = []
images = []
for data in data_postgres:
    targets.append(data[0])
    image = Image.open(io.BytesIO(data[1]))
    images.append(image)
    

data_train = CustomDataset(images, targets,transform=transformation)

datatrain_size : 15588


In [7]:
cursor_test= conn_select.cursor()

sql2 = f'''select 
    target,
    image from medical_pictures_test;'''
cursor_test.execute(sql2)
data_postgres_test = cursor_test.fetchall()
cursor_test.close()
targets_test = []
images_test = []


for data in data_postgres_test:
    targets_test.append(data[0])
    bytes_io = io.BytesIO(data[1])
    image_open = Image.open(bytes_io)
    images_test.append(image_open)
    
    

data_test = CustomDataset(images_test, targets_test,transform=base_transform)

In [8]:
train_dataloader = torch.utils.data.DataLoader(
    data_train, batch_size=batch_size, shuffle=True, num_workers=4
)
val_dataloader = torch.utils.data.DataLoader(
    data_test, batch_size=batch_size, shuffle=False, num_workers=4
)

In [15]:

model = torchvision.models.googlenet(pretrained=True)
model.to(device)

num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
model.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.00001)


Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /home/roman/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:04<00:00, 10.5MB/s]


In [16]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score, classification_report

for epoch in range(epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(tqdm(train_dataloader), 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:  
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
            
    model.eval()
    train_preds = []
    train_labels = []
    with torch.no_grad():
        for images, labels in tqdm(train_dataloader):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_labels.extend(labels.cpu().numpy())
    train_accuracy = accuracy_score(train_labels, train_preds)
    
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss:.4f}, "
          f"Train Accuracy: {train_accuracy:.4f}")
    
print('Finished Training')
with open('googleNet.pickle', 'wb') as f:
    pickle.dump(model, f)

  0%|          | 0/488 [00:00<?, ?it/s]

[1,   100] loss: 3.078
[1,   200] loss: 2.972
[1,   300] loss: 2.894
[1,   400] loss: 2.817


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [1/15], Loss: 242.9678, Train Accuracy: 0.2246


  0%|          | 0/488 [00:00<?, ?it/s]

[2,   100] loss: 2.605
[2,   200] loss: 2.482
[2,   300] loss: 2.445
[2,   400] loss: 2.360


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [2/15], Loss: 203.1068, Train Accuracy: 0.3272


  0%|          | 0/488 [00:00<?, ?it/s]

[3,   100] loss: 2.261
[3,   200] loss: 2.215
[3,   300] loss: 2.188
[3,   400] loss: 2.178


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [3/15], Loss: 183.4643, Train Accuracy: 0.3789


  0%|          | 0/488 [00:00<?, ?it/s]

[4,   100] loss: 2.095
[4,   200] loss: 2.054
[4,   300] loss: 2.034
[4,   400] loss: 2.015


  0%|          | 0/488 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f02f9b2f7f0>
Traceback (most recent call last):
  File "/home/roman/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/home/roman/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f02f9b2f7f0>
Traceback (most recent call last):
  File "/home/roman/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/home/roman/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if

Epoch [4/15], Loss: 175.4202, Train Accuracy: 0.4157


  0%|          | 0/488 [00:00<?, ?it/s]

[5,   100] loss: 1.969
[5,   200] loss: 1.917
[5,   300] loss: 1.904
[5,   400] loss: 1.937


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [5/15], Loss: 169.3515, Train Accuracy: 0.4472


  0%|          | 0/488 [00:00<?, ?it/s]

[6,   100] loss: 1.851
[6,   200] loss: 1.866
[6,   300] loss: 1.841
[6,   400] loss: 1.810


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [6/15], Loss: 163.8023, Train Accuracy: 0.4747


  0%|          | 0/488 [00:00<?, ?it/s]

[7,   100] loss: 1.758
[7,   200] loss: 1.758
[7,   300] loss: 1.790
[7,   400] loss: 1.745


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [7/15], Loss: 156.7395, Train Accuracy: 0.5030


  0%|          | 0/488 [00:00<?, ?it/s]

[8,   100] loss: 1.665
[8,   200] loss: 1.686
[8,   300] loss: 1.663
[8,   400] loss: 1.734


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [8/15], Loss: 147.6223, Train Accuracy: 0.5198


  0%|          | 0/488 [00:00<?, ?it/s]

[9,   100] loss: 1.607
[9,   200] loss: 1.587
[9,   300] loss: 1.638
[9,   400] loss: 1.613


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [9/15], Loss: 146.2110, Train Accuracy: 0.5375


  0%|          | 0/488 [00:00<?, ?it/s]

[10,   100] loss: 1.565
[10,   200] loss: 1.533
[10,   300] loss: 1.513
[10,   400] loss: 1.560


  0%|          | 0/488 [00:00<?, ?it/s]

IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f02f9b2f7f0><function _MultiProcessingDataLoaderIter.__del__ at 0x7f02f9b2f7f0>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/roman/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
  File "/home/roman/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
        self._shutdown_workers()self._shutdown_workers()

  File "/home/roman/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
  File "/home/roman/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
        if w.is_alive():if w.is_alive():

  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
  File "/usr/lib/python3.10/mult

Epoch [10/15], Loss: 137.4413, Train Accuracy: 0.5516


  0%|          | 0/488 [00:00<?, ?it/s]

[11,   100] loss: 1.446
[11,   200] loss: 1.475
[11,   300] loss: 1.497
[11,   400] loss: 1.495


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [11/15], Loss: 131.9131, Train Accuracy: 0.5707


  0%|          | 0/488 [00:00<?, ?it/s]

[12,   100] loss: 1.408
[12,   200] loss: 1.395
[12,   300] loss: 1.428
[12,   400] loss: 1.436


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [12/15], Loss: 128.0942, Train Accuracy: 0.5824


  0%|          | 0/488 [00:00<?, ?it/s]

[13,   100] loss: 1.329
[13,   200] loss: 1.351
[13,   300] loss: 1.368
[13,   400] loss: 1.383


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [13/15], Loss: 122.7930, Train Accuracy: 0.6136


  0%|          | 0/488 [00:00<?, ?it/s]

[14,   100] loss: 1.278
[14,   200] loss: 1.335
[14,   300] loss: 1.303
[14,   400] loss: 1.275


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [14/15], Loss: 116.1967, Train Accuracy: 0.6255


  0%|          | 0/488 [00:00<?, ?it/s]

[15,   100] loss: 1.254
[15,   200] loss: 1.225
[15,   300] loss: 1.263
[15,   400] loss: 1.243


  0%|          | 0/488 [00:00<?, ?it/s]

Epoch [15/15], Loss: 108.8262, Train Accuracy: 0.6354
Finished Training


In [17]:
from sklearn.metrics import roc_auc_score, precision_score

correct = 0
total = 0
y_true = []
y_pred = []
predictions = []
with torch.no_grad():
    for data in tqdm(val_dataloader):
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(probabilities.cpu().numpy())
        
        
print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))
test_roc_auc = roc_auc_score(y_true, y_pred,multi_class='ovr')
test_classification_report = classification_report(y_true, predictions)
precision = precision_score(y_true,predictions,average='macro')
test_accuracy = accuracy_score(y_true, predictions)

print(f"Test Accuracy: {test_accuracy:.4f}, test precision {precision:.4f}"  f"Test ROC-AUC: {test_roc_auc:.4f}")

print(f"Classification Report:\n{test_classification_report}")


  0%|          | 0/126 [00:00<?, ?it/s]

Accuracy of the network on the test images: 45 %
Test Accuracy: 0.4563, test precision 0.4226Test ROC-AUC: 0.8912
Classification Report:
              precision    recall  f1-score   support

           0       0.35      0.62      0.45       314
           1       0.72      0.71      0.72       317
           2       0.47      0.52      0.50       293
           3       0.41      0.44      0.43       357
           4       0.51      0.48      0.49       330
           5       0.38      0.10      0.16       110
           6       0.35      0.23      0.28       143
           7       0.55      0.47      0.51       116
           8       0.71      0.78      0.74       261
           9       0.41      0.29      0.34       123
          10       0.29      0.26      0.27       113
          11       0.27      0.15      0.19        73
          12       0.40      0.21      0.27       101
          13       0.49      0.57      0.53        60
          14       0.35      0.34      0.35       10

AxisError: axis 1 is out of bounds for array of dimension 1