<a href="https://colab.research.google.com/github/NguyenThuan215/ML-echo-cardiography/blob/main/echocardiography.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Thư viện

In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt

from google.colab import drive
import os

import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder

from collections import namedtuple
from sklearn.metrics import classification_report

import torch
from torch import nn
from torch.utils.data import DataLoader

# Kết nối với data

In [2]:
# drive.mount("/content/drive")
# !unzip -uq "/content/drive/My Drive/DATA_CHAMBER_2021.zip" -d "./"
# traindir = "DATA_CHAMBER_2021/train"
# testdir = "DATA_CHAMBER_2021/test"

In [3]:
!git clone https://github.com/NguyenThuan215/ML-echo-cardiography
traindir = "/content/ML-echo-cardiography/DATA_CHAMBER_2021/train"
testdir = "/content/ML-echo-cardiography/DATA_CHAMBER_2021/test"

Cloning into 'ML-echo-cardiography'...
remote: Enumerating objects: 8348, done.[K
remote: Counting objects: 100% (8348/8348), done.[K
remote: Compressing objects: 100% (8345/8345), done.[K
remote: Total 8348 (delta 10), reused 8327 (delta 1), pack-reused 0[K
Receiving objects: 100% (8348/8348), 488.03 MiB | 27.04 MiB/s, done.
Resolving deltas: 100% (10/10), done.
Checking out files: 100% (8328/8328), done.


# Chuẩn bị dữ liệu:
1. Các lớp: {2C, 3C, 4C}
2. Đọc dữ liệu trong file 'traindir' và 'testdir'
3. Đưa dữ liệu vào các batch để xử lý song song


In [4]:
TrainTest = namedtuple('TrainTest', ['train', 'test'])

def get_classes():
  classes = ['2C', '3C', '4C']
  return classes

def prepare_data():
  image_size = 224
  transform_train = transforms.Compose([
      transforms.Resize((image_size,image_size)),                                    
      transforms.ToTensor(),
  ])
  transform_test = transforms.Compose([
      transforms.Resize((image_size,image_size)),
      transforms.ToTensor(),
  ])
  trainset = torchvision.datasets.ImageFolder(root=traindir, transform=transform_train)
  testset = torchvision.datasets.ImageFolder(root=testdir, transform=transform_test)
  return TrainTest(train=trainset, test=testset)

def prepare_loader(datasets):
  batch = 32
  worker = 4
  trainloader = DataLoader(dataset=datasets.train, batch_size=batch, shuffle=True, num_workers=worker)
  testloader = DataLoader(dataset=datasets.test, batch_size=batch, shuffle=False, num_workers=worker)
  return TrainTest(train=trainloader, test=testloader)

# Train/Test mô hình:
1. Train: với mỗi batch trong tập train:
  - Cho ảnh đi qua model
  - Tính lỗi bằng hàm lỗi "loss_func"
  - Cập nhật tham số
  - Báo cáo sau "reporting_steps" bước
2. Test:
  - Đặt model ở chế độ đánh giá (evaluate)
  - Tính toán đầu ra cho từng ảnh
  - trả về nhãn dự đoán/nhãn thực

In [5]:
def train_epoch(epoch, model, loader, loss_func, optimizer, device):
  model.train()
  running_loss = 0.0
  reporting_steps = 40
  step = 0
  for images, labels in loader:
    step += 1
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    loss = loss_func(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    if step % reporting_steps == reporting_steps - 1:
      print(f"Epoch {epoch} step {step} ave_loss {running_loss/reporting_steps:.4f}")
      running_loss = 0.0

def test_epoch(epoch, model, loader, device):
  ytrue = []
  ypred = []
  with torch.no_grad():
    model.eval()
    for images, labels in loader:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs, dim=1)
      ytrue += list(labels.cpu().numpy())
      ypred += list(predicted.cpu().numpy())

  return ypred, ytrue

# Tạo và thực nghiệm mô hình:
1. Tham số truyền vào main:
  - "PATH": file lưu lại mô hình
  - "model_in": string thể hiện tên mô hình muốn thực nghiệm.
2. Sửa đổi đầu ra của lớp Linear cuối cùng thành "3" để phù hợp với yêu cầu bào toán
3. Sử dụng hàm lỗi CrossEntropyLoss, hàm tối ưu SGD (Stochastic Gradient Descent)


In [6]:
def main(PATH='./model.pth', model_in=None):
  classes = get_classes()
  datasets = prepare_data()
  loaders = prepare_loader(datasets)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  print("Num Images in train set:", len(datasets.train))
  print("Num Images in test set:", len(datasets.test))
  print("Num batch in train set: ", len(loaders.train))
  print("class: ", datasets.train.class_to_idx)
  print("image size: ", datasets.train[0][0].shape)
  print("device:", device)
  print("model:", model_in)

  if model_in == 'vgg16':  
    model = torchvision.models.vgg16()
    model.classifier[6] = torch.nn.modules.linear.Linear(in_features=4096, out_features=3, bias=True)
  elif model_in == 'resnet50':
    model = torchvision.models.resnet50()
    model.fc = torch.nn.modules.linear.Linear(in_features=2048, out_features=3, bias=True) 
  elif model_in == 'resnet18':
    model = torchvision.models.resnet18()
    model.fc = torch.nn.modules.linear.Linear(in_features=512, out_features=3, bias=True) 
  else: 
    # model = torchvision.models.googlenet()
    # model.fc.out_features = 3
    pass


  model.to(device=device)
  loss_func = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

  accuracies = []
  for epoch in range(10):
    print("---------------------------------------------------------------")
    print(f"\nEpoch {epoch} report: ")
    train_epoch(epoch, model, loaders.train, loss_func, optimizer, device)
    ypred_test, ytrue_test = test_epoch(epoch, model, loaders.test, device)
    print("Test report: \n", classification_report(ytrue_test, ypred_test, target_names=classes))
    torch.save(model.state_dict(), PATH)

    # calculate accurency
    ypred_test = np.array(ypred_test)
    ytrue_test = np.array(ytrue_test)
    accuracy = (ytrue_test==ypred_test).sum() / len(ytrue_test)
    accuracies.append(accuracy)


  fig, ax1 = plt.subplots(1,1, figsize=(8,4))
  ax1.plot(accuracies, "bo--", label=model_in)
  ax1.set(title=model_in, xlabel="epoch", ylabel="accuracy"+ model_in, xlim=(-0.5,10), ylim=(0,1))
  plt.show()

  return model

In [None]:
model = main(PATH="./vgg16.pth", model_in='vgg16')

  cpuset_checked))


Num Images in train set: 6717
Num Images in test set: 1607
Num batch in train set:  210
<class 'torch.utils.data.dataloader.DataLoader'>
class:  {'2C': 0, '3C': 1, '4C': 2}
image size:  torch.Size([3, 224, 224])
device: cuda
model: vgg16
---------------------------------------------------------------

Epoch 0 report: 


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 0 step 39 ave_loss 1.0651
Epoch 0 step 79 ave_loss 0.8773
Epoch 0 step 119 ave_loss 0.5270
Epoch 0 step 159 ave_loss 0.3648
Epoch 0 step 199 ave_loss 0.9026
Test report: 
               precision    recall  f1-score   support

          2C       0.54      0.72      0.62       409
          3C       0.57      0.81      0.67       367
          4C       1.00      0.65      0.79       831

    accuracy                           0.71      1607
   macro avg       0.70      0.73      0.69      1607
weighted avg       0.78      0.71      0.72      1607

---------------------------------------------------------------

Epoch 1 report: 
Epoch 1 step 39 ave_loss 0.3395
Epoch 1 step 79 ave_loss 0.2141
Epoch 1 step 119 ave_loss 0.1455
Epoch 1 step 159 ave_loss 0.0924
Epoch 1 step 199 ave_loss 0.0457
Test report: 
               precision    recall  f1-score   support

          2C       0.78      0.77      0.77       409
          3C       0.58      1.00      0.73       367
          4C      

In [None]:
model = main(PATH="./resnet18.pth", model_in='resnet18')

In [None]:
model = main(PATH="./resnet50.pth", model_in='resnet50')

In [None]:
# model = main(PATH="./googlenet.pth", model_in='googlenet')

# Vẽ Biểu đồ

In [None]:
# vgg16_64 = [0.25, 0.43, 0.66, 0.72, 0.80, 0.87, 0.90, 0.90, 0.90, 0.91]
# vgg16_224 = [0.25, 0.61, 0.78, 0.83, 0.86, 0.87, 0.88, 0.92, 0.89, 0.90]

# resnet18_64 = [0.56, 0.63, 0.75, 0.85, 0.89, 0.90, 0.90, 0.91, 0.90, 0.91]
# resnet18_224 = [0.65, 0.53, 0.71, 0.81, 0.83, 0.89, 0.90, 0.91, 0.90, 0.92]

# resnet50_64 = [0.65, 0.80, 0.79, 0.78, 0.80, 0.77, 0.83, 0.82, 0.82, 0.84]
# resnet50_224 = [0.61, 0.77, 0.83, 0.83, 0.86, 0.81, 0.83, 0.89, 0.85, 0.87]

# net = [vgg16_64, vgg16_224, resnet18_64, resnet18_224, resnet50_64, resnet50_224]
# fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3,2, figsize=(15,18))

# ax1.plot(vgg16_64, "bo--", label="64x64")
# ax1.set(title="VGG16 64x64", xlabel="epoch", ylabel="accuracy",xlim=(-0.5,10), ylim=(0,1))
# ax2.plot(vgg16_224, "go--", label="224x224")
# ax2.set(title="VGG16 224x224", xlabel="epoch", ylabel="accuracy", xlim=(-0.5,10), ylim=(0,1))

# ax3.plot(resnet18_64, "bo--", label="64x64")
# ax3.set(title="RESNET18 64x64", xlabel="epoch", ylabel="accuracy",xlim=(-0.5,10), ylim=(0,1))
# ax4.plot(resnet18_224, "go--", label="224x224")
# ax4.set(title="RESNET18 224x224", xlabel="epoch", ylabel="accuracy", xlim=(-0.5,10), ylim=(0,1))

# ax5.plot(resnet50_64, "bo--", label="64x64")
# ax5.set(title="RESNET50 64x64", xlabel="epoch", ylabel="accuracy",xlim=(-0.5,10), ylim=(0,1))
# ax6.plot(resnet50_224, "go--", label="224x224")
# ax6.set(title="RESNET50 224x224", xlabel="epoch", ylabel="accuracy", xlim=(-0.5,10), ylim=(0,1))

# fig.xlabel="x"
# fig.ylabel="y"
# plt.show()

# c = []

# a = [1,2,1,1,2, 1,1 ,1,2,2]
# b = [1,2,2,1,2, 2,1 ,1,2,2]
# a = np.array(a)
# b = np.array(b)


# c.append((a==b).sum() / len(a))
# c.append((a!=b).sum() / len(a))

# fig, ax1 = plt.subplots(1,1, figsize=(8,4))

# ax1.plot(a, "bo--", label="64x64")
# ax1.set(title="VGG16 64x64", xlabel="epoch", ylabel="accuracy",xlim=(-0.5,10), ylim=(0,3))
# ax2.plot(vgg16_224, "go--", label="224x224")
# ax2.set(title="VGG16 224x224", xlabel="epoch", ylabel="accuracy", xlim=(-0.5,10), ylim=(0,1))

# ax3.plot(resnet18_64, "bo--", label="64x64")
# ax3.set(title="RESNET18 64x64", xlabel="epoch", ylabel="accuracy",xlim=(-0.5,10), ylim=(0,1))
# ax4.plot(resnet18_224, "go--", label="224x224")
# ax4.set(title="RESNET18 224x224", xlabel="epoch", ylabel="accuracy", xlim=(-0.5,10), ylim=(0,1))

# ax5.plot(resnet50_64, "bo--", label="64x64")
# ax5.set(title="RESNET50 64x64", xlabel="epoch", ylabel="accuracy",xlim=(-0.5,10), ylim=(0,1))
# ax6.plot(resnet50_224, "go--", label="224x224")
# ax6.set(title="RESNET50 224x224", xlabel="epoch", ylabel="accuracy", xlim=(-0.5,10), ylim=(0,1))

# fig.xlabel="x"
# fig.ylabel="y"
# plt.show()