### 1D-CNN Demo

In [1]:
import os
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

from src.one_d_cnn import *

import sklearn

In [2]:
random.seed(1234)
torch.manual_seed(1234)

<torch._C.Generator at 0x7f396ea4a750>

### Dataset

In [3]:
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from sklearn.model_selection import train_test_split
from src.custom_dataset import RadiomicDataset
from sklearn.model_selection import KFold

In [4]:
root_hc18 = '/mnt/storage/fangyijie/HC18/radiomics'
root_sp = '/mnt/storage/fangyijie/SP_transthalamic/radiomics'

hc18_ga_csv = 'training_set_pixel_size_and_HC_GA.csv'
sp_ga_csv = 'transthalamic_ga.csv'

csv = 'features.csv'

In [18]:
# hc18
raw_hc18_rad_df = pd.read_csv(os.path.join(root_hc18, csv))
raw_hc18_ga_df = pd.read_csv(os.path.join(root_hc18, hc18_ga_csv))

# spainish thalamic
raw_sp_rad_df = pd.read_csv(os.path.join(root_sp, csv))
raw_sp_ga_df = pd.read_csv(os.path.join(root_sp, sp_ga_csv))

In [19]:
print(raw_hc18_ga_df.shape)
print(raw_sp_ga_df.shape)

(997, 5)
(1552, 3)


In [20]:
print(raw_hc18_ga_df.columns)
print(raw_sp_ga_df.columns)

Index(['filename', 'pixel size(mm)', 'head circumference (mm)', 'Unnamed: 3',
       'GA'],
      dtype='object')
Index(['Name', 'HC_mm', 'GA'], dtype='object')


In [21]:
# Predicting GA
# raw_hc18_ga_df = raw_hc18_ga_df.loc[:, ['filename', 'GA']].rename(columns={"filename": "image_name"})
# raw_sp_ga_df = raw_sp_ga_df.loc[:, ['Name', 'GA']].rename(columns={"Name": "image_name"})

# Predicting HC
raw_hc18_ga_df = raw_hc18_ga_df.loc[:, ['filename', 'head circumference (mm)']].rename(columns={"filename": "image_name"})
raw_sp_ga_df = raw_sp_ga_df.loc[:, ['Name', 'HC_mm']].rename(columns={"Name": "image_name"})
raw_hc18_ga_df = raw_hc18_ga_df.rename(columns={"head circumference (mm)": "HC"})
raw_sp_ga_df = raw_sp_ga_df.rename(columns={"HC_mm": "HC"})

In [22]:
src_hc_df = pd.merge(raw_hc18_rad_df, raw_hc18_ga_df, on="image_name")
src_sp_df = pd.merge(raw_sp_rad_df, raw_sp_ga_df, on="image_name")

In [23]:
print(src_hc_df.shape)
print(src_sp_df.shape)

(997, 120)
(1552, 120)


In [25]:
src_hc_df.iloc[:, -1].head()

0     79.86
1    269.80
2    174.76
3    224.70
4     71.91
Name: HC, dtype: float64

In [26]:
# ignore the first 22 columns
X_hc18 = src_hc_df.iloc[:, 23:-1]
y_hc18 = src_hc_df.iloc[:, -1]

X_sp = src_sp_df.iloc[:, 23:-1]
y_sp = src_sp_df.iloc[:, -1]

In [31]:
# select 70% for training and 30% for testing
X1_train, X1_test, y1_train, y1_test = train_test_split(X_hc18, y_hc18, train_size=0.7)
X2_train, X2_test, y2_train, y2_test = train_test_split(X_sp, y_sp, train_size=0.7)

In [32]:
X_train = pd.concat([X1_train, X2_train])
y_train = pd.concat([y1_train, y2_train])

X_test = pd.concat([X1_test, X2_test])
y_test = pd.concat([y1_test, y2_test])

Examine the dataframe size

In [33]:
print(X_train.shape)
print(y_train.shape)

(1783, 96)
(1783,)


In [34]:
print(X_test.shape)
print(y_test.shape)

(766, 96)
(766,)


### Save dataframes

In [35]:
# trg_dir = '/mnt/storage/fangyijie/radiomics_ga/ga_data'
trg_dir = '/mnt/storage/fangyijie/radiomics_ga/hc_data'

In [36]:
X_train.to_pickle(os.path.join(trg_dir, 'X_train.pkl'))
y_train.to_pickle(os.path.join(trg_dir, 'y_train.pkl'))

X_test.to_pickle(os.path.join(trg_dir, 'X_test.pkl'))
y_test.to_pickle(os.path.join(trg_dir, 'y_test.pkl'))

How load dataframe file (pkl)?

In [None]:
# X_train = pd.read_pickle(os.path.join(trg_dir, 'X_train.pkl'))