#### Path and Shell

In [1]:
import subprocess
import csv

def call_shell(cmd, *argv):
    list_ = [cmd];    [list_.append(arg) for arg in argv]
    out = subprocess.run(list_, stdout=subprocess.PIPE)
    return out.stdout.decode('utf-8')

#### Selected DATA + Info

In [2]:
import pandas as pd
import numpy as np

data = pd.read_csv('oasis_cross-sectional.csv',sep=',',header=0,index_col=0)
data['CDR'] = data['CDR'].replace(0.5,1)
data.head()

Unnamed: 0_level_0,M/F,Hand,Age,Educ,SES,MMSE,CDR,eTIV,nWBV,ASF
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
OAS1_0072_MR1,F,R,60,5,1.0,30,0.0,1402,823.0,1252.0
OAS1_0200_MR1,F,R,60,2,4.0,30,0.0,1366,807.0,1285.0
OAS1_0109_MR1,F,R,61,4,3.0,30,0.0,1313,813.0,1337.0
OAS1_0455_MR1,F,R,61,2,4.0,28,0.0,1354,825.0,1297.0
OAS1_0456_MR1,M,R,61,5,2.0,30,0.0,1637,0.78,1072.0


#### Cross Validation Subset

In [3]:
def cross_val_subset(x,y,k=10):
    from random import Random
    from itertools import cycle
    temp_s = list(zip(x,y))
    Random(42).shuffle(temp_s)
    x,y = zip(*temp_s)
    
    TEST = int(len(y)*0.1)
    TRAIN = len(y)-TEST
    
    rtn_x,rtn_y = np.zeros(k,dtype=tuple), np.zeros(k,dtype=tuple)
    
    for i in range(k):
        start = i*TEST
        x = np.roll(x,start); y = np.roll(y,start)
        rtn_x[i] = (x[0:TRAIN],x[TRAIN:])
        rtn_y[i] = (y[0:TRAIN],y[TRAIN:])
    return rtn_x, rtn_y

In [4]:
k_x,k_y = cross_val_subset(data.index.values, data.CDR.values)

DEST = '/media/avell/Novo volume/CV_SARGITAL/'
FOLDER = '/media/avell/Novo volume/RAW_SARGITAL/'

for i in range(10):
    fold = DEST+'fold_{}'.format(i)
    train = fold+'/train/'; test = fold+'/test/'
    folders = [fold,train,test]
    [call_shell('mkdir',i) for i in folders]
    [call_shell('mkdir',i+'1.0') for i in folders[1:]]
    [call_shell('mkdir',i+'0.0') for i in folders[1:]]
    
import csv
EXT = '_s.jpg'

for x,y,k in zip(k_x,k_y,['fold_{}'.format(i) for i in range(10)]):
    folders = ['/train/','/test/']
    folders = [DEST+k+f for f in folders]
    
    for i in range(2):
        to_   = folders[i]
        csvData = zip(x[i],y[i])
        with open(folders[i]+folders[i].split('/')[-2]+'.csv','w') as csvFile:
            writer = csv.writer(csvFile)
            writer = writer.writerows(csvData)
        csvFile.close()
        
        for sample in range(0,len(x[i])):
#             from_ = FOLDER+str(y[i][sample])+'/'+str(x[i][sample])+EXT
            from_ = FOLDER+str(x[i][sample])+EXT
            call_shell('cp',from_,to_+str(y[i][sample]))

#### Cross Validation Super Learner

In [None]:
DEST = 'CV_SARGITAL/'
EXT = '_s.jpg'

for i in range(10):
    ldst = DEST+'fold_{}/'.format(i)
    FOLDER = ldst+'train/'
    df = pd.read_csv(FOLDER+'train.csv',header=None,names=['ID','CDR'])
    k_x,k_y = cross_val_subset(df.ID.values, df.CDR.values)

    for i in range(10):
        fold = FOLDER+'fold_{}'.format(i)
        train = fold+'/train/'; test = fold+'/test/'
        folders = [fold,train,test]
        [call_shell('mkdir',i) for i in folders]
        [call_shell('mkdir',i+'1.0') for i in folders[1:]]
        [call_shell('mkdir',i+'0.0') for i in folders[1:]]

    for x,y,k in zip(k_x,k_y,['fold_{}'.format(i) for i in range(10)]):
        folders = ['/train/','/test/']
        folders = [FOLDER+k+f for f in folders]

        for i in range(2):
            to_   = folders[i]
            csvData = zip(x[i],y[i])
            with open(folders[i]+folders[i].split('/')[-2]+'.csv','w') as csvFile:
                writer = csv.writer(csvFile)
                writer = writer.writerows(csvData)
            csvFile.close()
            
            for sample in range(0,len(x[i])):
                from_ = FOLDER+str(y[i][sample])+'/'+str(x[i][sample])+EXT
                call_shell('cp',from_,to_+str(y[i][sample]))

In [None]:
from skimage import io

CORONAL = 'c.jpg'
AXIAL   = 'a.jpg'

for ID in data.index:
    CDR = str(data.at[ID,'CDR'])
    for i, j in zip(range(85,97), range(105,117)):
        crf = FOLDER+ID+'_{}_'.format(j)+CORONAL
        axf = FOLDER+ID+'_{}_'.format(i)+AXIAL
        
#         print(crf, axf)

        dest_A = DEST+'AXIAL/'+CDR
        dest_C = DEST+'CORONAL/'+CDR
        
#         print(dest_A, dest_C)

        call_shell('cp',crf ,dest_C)
        call_shell('cp',axf ,dest_A)

In [None]:
def data_create(dir_,data):
    labels = call_shell('ls',dir_).split('\n');    labels.pop()
    imgs = [io.ImageCollection(dir_+i+'/*.jpg') for i in labels]
    #Labels
    y = []
    [y.append(np.full((1,len(i)),float(l))) for i,l in zip(imgs,labels)]
    y = np.hstack(y); y = y.ravel()
    #Features
    R,C = imgs[0][0].shape
    x = np.zeros([y.shape[0],R,C])
    cnt = 0
    for img in imgs:
        for i in img:
            x[cnt] = i
            cnt += 1
    np.save(data+'_x.npy',x)
    np.save(data+'_y.npy',y)

data_create('/media/avell/Novo volume/dataset/AXIAL/','axial')
data_create('/media/avell/Novo volume/dataset/CORONAL/','coronal')

In [None]:
print(np.load('axial_x.npy').shape)
print(np.load('axial_y.npy').shape)

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt

d2 = np.load('coronal_x.npy')
d1 = np.load('axial_x.npy')
plt.subplot(121)
plt.imshow(d1[2],cmap='gray')
plt.subplot(122)
im = d1[2][40:150,50:130]
plt.imshow(im,cmap='gray')

axial_small = np.zeros([len(d1), im.shape[0], im.shape[1]])
axial_small[:,:,:] = d1[:,40:150,50:130]
np.save('axial_small.pny',axial_small)

In [None]:
plt.subplot(121)
plt.imshow(d2[9],cmap='gray')
plt.subplot(122)
plt.imshow(d2[9][60:135,40:130],cmap='gray')
im = d2[2][60:135,40:130]

coronal_small = np.zeros([len(d1), im.shape[0], im.shape[1]])
coronal_small[:,:,:] = d2[:,60:135,40:130]
np.save('coronal_small.pny',coronal_small)

In [None]:
from sklearn.model_selection import train_test_split, cross_validate

IDarray = data.index.values
CDRarray = data.CDR.values

x_train, x_test, y_train, y_test = train_test_split(
               IDarray, CDRarray, test_size=0.2, random_state=42)

# Usar divisão anterior no Random Forest

x_train, x_val, y_train, y_val = train_test_split(
               x_train, y_train, test_size=0.2, random_state=42)

def split_data(org,dest,x,y,ext):
    for xi, yi in zip(x,y):
        for n in range(85,97):
            org_path = org+str(yi)+'/'+xi+'_{}'.format(n)+ext
            dest_path = dest+str(yi)

            call_shell('cp',org_path,dest_path)
        
ORG = '/media/avell/Novo volume/dataset/AXIAL/'
DEST = ORG+'validation/'
split_data(ORG,DEST,x_val,y_val,'_a.jpg')
DEST = ORG+'test/'
split_data(ORG,DEST,x_test,y_test,'_a.jpg')
DEST = ORG+'train/'
split_data(ORG,DEST,x_train,y_train,'_a.jpg')