In [1]:
import os
import random

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import SimpleITK as sitk
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from torch.utils.tensorboard import SummaryWriter
import torch
import torch.nn as nn
import monai.transforms as monai_transforms

from dataset import (
    MRIDataset, 
    get_loader, get_data, get_covid_data,
    get_normalization_param, 
    get_transform, get_normalization_param_nomask)
from models import C3D, generate_model, ResNet, get_model
from train import epoch_iter, add_metrics, save_checkpoint, get_metrics



In [2]:
df = pd.read_excel("/data1/KidLead/KidLead_analysis/project_info/PBproject_info.xlsx", sheet_name = 'Total')

In [3]:
df

Unnamed: 0,HN,First_Name,Surname,TH_Name,FSIQ_01,IQ_01_range,Age_Day_01,Age_Y_01,Age_M_01,Lead(Pb),SEX_convert,Group
0,3640688,Aiyada,Samanchuea,อัยดา สมานเชื้อ,107,Average,3637,9,119,3.030,F,normal
1,4726412,Akkharadet,Saengchan,อัครเดช แสงจันทร์,116,bright normal,4807,13,157,5.270,M,normal
2,4726420,Alina,Wongchaidet,อาลีน่า วงษ์ไชยเดช,93,Average,3406,9,111,2.820,F,normal
3,4720316,Amani,Charoenying,อมานี เจริญยิ่ง,107,Average,3287,8,107,1.760,F,normal
4,4720208,Ananya,Charoenmun,อนัญญา เจริญมูล,99,Average,3586,9,117,2.840,F,normal
...,...,...,...,...,...,...,...,...,...,...,...,...
149,4854341,Surasak,Phaisantham,สุรศักดิ์ ไพศาลธรรม,87,low average,4516,12,148,22.830,M,lead
150,4854351,Waranya,Bupphachaem,วรัณญา บุปผาแช่ม,90,Average,4279,11,140,11.685,F,lead
151,4854350,Warinya,Bupphachaem,วริณญา บุปผาแช่ม,91,Average,4279,11,140,11.685,F,lead
152,4854346,Wawalee,Tamasee,วาวลี ตามาสี,100,Average,4028,11,132,11.450,F,lead


In [15]:
F.softmax(torch.Tensor(a)).tolist()

Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.


tensor([[0.4173, 0.5827],
        [0.4646, 0.5354],
        [0.3669, 0.6331]])

In [11]:
a = [[-0.02297105,  0.31076393],
 [ 0.27239621,  0.41407093],
 [-0.03566893,  0.50999635]]

In [12]:
a

[[-0.02297105, 0.31076393],
 [0.27239621, 0.41407093],
 [-0.03566893, 0.50999635]]

In [5]:
number_of_data = 961
test_size = 0.2
random_state = 55555
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, random_state=random_state, shuffle=True)
all_index = list(range(number_of_data))

train_index, test_index = train_test_split(all_index,  test_size= test_size, random_state=random_state)

for i, (train_index, val_index) in enumerate(kf.split(train_index)):
    print(val_index)


[  1   5   8  10  20  27  32  35  36  42  46  49  50  54  55  57  60  65
  72  77  79  92  94  97 100 104 109 111 116 124 125 132 134 136 137 138
 141 142 145 146 148 166 175 177 178 189 205 206 211 213 214 219 222 223
 241 242 251 256 266 279 291 295 300 304 307 309 320 321 323 334 337 341
 344 353 354 357 363 369 370 371 374 375 376 394 403 412 413 427 439 442
 449 460 467 468 469 476 480 490 497 501 504 506 508 509 513 514 517 522
 527 528 530 538 540 541 546 548 552 563 565 571 576 607 623 624 625 627
 634 645 646 648 650 653 658 659 663 664 672 673 687 694 696 699 701 704
 729 736 744 748 751 752 754 755 760 765]
[  0   6   7  13  15  18  21  24  31  37  48  52  53  61  63  69  70  76
  81  82  85  86  87  88  91  95  99 103 117 121 129 147 157 158 162 167
 169 171 181 184 185 187 191 202 204 226 234 236 244 248 250 252 254 262
 282 283 292 294 296 299 301 308 312 317 325 326 328 333 335 336 338 345
 349 352 367 379 387 397 398 400 404 405 410 411 414 419 426 428 432 437
 440 443 

In [2]:
basepath = "/data0/AI_SAMPLES/AI_SAMPLES_18DEC2022"
csvpath = '/data0/AI_SAMPLES/mapping_18DEC2022.csv'

df_data = pd.read_csv(csvpath, header=None, index_col = 0, usecols=[0, 1, 4])
df_data.columns = ['class', 'name']
df_data.index.name = None

filenames = []
labels = []

for name, label in zip(df_data.name, df_data['class']):
    folderpath = os.path.join(basepath, name)
    try:
        fdt_paths_path = os.path.join(folderpath, 'DTI', 'fdt_paths_vol0.nii.gz')
        fdt_paths = sitk.ReadImage(fdt_paths_path)
        a = sitk.GetArrayFromImage(fdt_paths)
        if a.shape[0] != 62 or a.shape[1] != 128 or a.shape[2] != 128:
            print(a.shape)
            continue
    except:
        continue
    filenames.append(name)
    if label == 'mci':
        labels.append(1)
    elif label == 'normal':
        labels.append(0)
    elif label == 'mmd':
        labels.append(2) 


df_data = pd.DataFrame()
df_data['label'] = labels
df_data['name'] = filenames

(64, 128, 128)
(64, 128, 128)
(64, 128, 128)
(60, 128, 128)
(64, 128, 128)
(64, 128, 128)
(62, 144, 144)
(60, 128, 128)


In [35]:
y = torch.Tensor(labels)
x = filenames

In [38]:
y[0].to('cuda')

tensor(1., device='cuda:0')

In [32]:
len(x)

198

In [25]:

x_train, x_test, y_train,  y_test = train_test_split(x, y,  test_size= 0.25)


In [26]:
basepath = "/data0/AI_SAMPLES/AI_SAMPLES_18DEC2022"

train_dataset = MRIDataset(basepath ,x_train, y_train, 'DTI', 'fdt_paths_vol0.nii.gz', transform=None)
test_dataset = MRIDataset(basepath ,x_test, y_test, 'DTI', 'fdt_paths_vol0.nii.gz', transform=None)


In [27]:
batch_size = 2
num_workers = 0

train_loader = DataLoader(
    train_dataset, 
    batch_size= batch_size, 
    drop_last=True,
    shuffle =True,
    num_workers=num_workers,
    pin_memory = True)

val_loader = DataLoader(
    test_dataset, 
    batch_size= batch_size, 
    drop_last=False,
    shuffle =False,
    num_workers=num_workers,
    pin_memory = True)


In [28]:
model = C3D()

In [29]:
for img, label in train_loader:
    pred = model(img)
    break

In [11]:
pred

tensor([[0.5956, 0.4957, 0.4121],
        [0.5534, 0.5574, 0.4805]], grad_fn=<SigmoidBackward0>)

In [10]:
import torch.nn as nn

In [12]:
loss = nn.CrossEntropyLoss()

In [21]:
label

tensor([2, 0])

In [23]:
pred

tensor([[-0.5796,  0.0397, -0.1379],
        [-0.1161, -0.3968,  0.1287]], grad_fn=<AddmmBackward0>)

In [22]:
loss(pred, label)

tensor(1.0494, grad_fn=<NllLossBackward0>)

In [23]:
label

tensor([2, 0])