In [1]:
import cv2                 
import numpy as np         
import os                  
from random import shuffle 
from tqdm import tqdm  
import pandas as pd

TRAIN_DIR = './images'
TEST_DIR = './test'
IMG_SIZE = 50
LR = 1e-3

MODEL_NAME = 'lung_cancer-{}-{}.model'.format(LR, 'lungs-basic') 
labels = {1:'Atelectasis', 2: 'Cardiomegaly', 3: 'Effusion', 4: 'Infiltration', 5: 'Mass', 6: 'Nodule', 7: 'Pneumonia', 8:
'Pneumothorax', 9: 'Consolidation', 10: 'Edema', 11: 'Emphysema', 12: 'Fibrosis', 13:
'Pleural_Thickening', 14: 'Hernia',15:'No Finding'}
labels = {v: k for k, v in labels.iteritems()}


In [2]:
ds = pd.read_csv('sample_labels.csv')
ds.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImageWidth,OriginalImageHeight,OriginalImagePixelSpacing_x,OriginalImagePixelSpacing_y
0,00000013_005.png,Emphysema|Infiltration|Pleural_Thickening|Pneu...,5,13,060Y,M,AP,3056,2544,0.139,0.139
1,00000013_026.png,Cardiomegaly|Emphysema,26,13,057Y,M,AP,2500,2048,0.168,0.168
2,00000017_001.png,No Finding,1,17,077Y,M,AP,2500,2048,0.168,0.168
3,00000030_001.png,Atelectasis,1,30,079Y,M,PA,2992,2991,0.143,0.143
4,00000032_001.png,Cardiomegaly|Edema|Effusion,1,32,055Y,F,AP,2500,2048,0.168,0.168


In [3]:
def split_map(val):
    new_val = []
    val = val.split('|')
    for i in val:
        new_val.append(labels[i])
    new_val = '|'.join(str(e) for e in new_val)
    return new_val
def split_map_arr(val):
    new_val = []
    arr = np.zeros((15,), dtype=int)
    val = val.split('|')
    for i in val:
        ind = labels[i]-1
        arr[ind] = 1
        
    return arr



In [4]:
value_r={}
for (i,v) in ds['Finding Labels'].iteritems():
    str_val = str(v)
    val = split_map(str_val)
    value_r[str_val]  = val
    
# ds['Finding Labels'] = ds['Finding Labels'].map(value_r)

In [5]:
ds1 = ds[['Image Index', 'Finding Labels']]

In [6]:
value_r1={}
for (i,v) in ds1['Finding Labels'].iteritems():
    str_val = str(v)
    val = split_map_arr(str_val)
    value_r1[str_val]  = val

ds1['Finding Labels'] = ds1['Finding Labels'].map(value_r1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [7]:
def create_label(img):
    vs = ds1.loc[ds1['Image Index'] == img]
    return vs.iloc[0]['Finding Labels']

def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = create_label(img)
        
        path = os.path.join(TRAIN_DIR,img)
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])
    shuffle(training_data)
    np.save('train_data.npy', training_data)
    return training_data

In [8]:
# train_data = create_train_data()
train_data = np.load('train_data.npy')

In [9]:
train_data

array([[array([[  3,   0,   0, ...,   0,   0,   0],
       [  0,   0,   0, ...,   2,   0,   0],
       [ 33,   0,   0, ...,  46,  24,  26],
       ...,
       [190, 108, 140, ...,   0,   0,   0],
       [185, 103, 140, ...,   0,   0,   0],
       [189, 130, 178, ...,  13,   0,   0]], dtype=uint8),
        array([1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])],
       [array([[  0,   0,   0, ...,   0,   0,   0],
       [ 14,  15,  27, ...,   0,   0,   0],
       [ 58,  65,  62, ...,  17,  19,   0],
       ...,
       [138, 170, 185, ...,  81,  11,   0],
       [135, 169, 199, ...,  70,  12,   0],
       [127, 190, 187, ...,  60,  12,   0]], dtype=uint8),
        array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])],
       [array([[203,  34,   2, ...,  10,   6, 105],
       [205,  32,   2, ...,   8,   4, 109],
       [202,  30,   2, ..., 189,   7, 237],
       ...,
       [135,   7,  10, ...,   7,   9, 181],
       [136,   6,  11, ...,   6,   9, 182],
       [136,   6,  11, ...,   6,   9

In [10]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 128, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 15, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.


In [11]:
if os.path.exists('{}.meta'.format(MODEL_NAME)):
    model.load(MODEL_NAME)
    print('model loaded!')

In [12]:
train = train_data[:-500]
test = train_data[-500:]

In [13]:
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]

In [15]:
model.fit({'input': X}, {'targets': Y}, n_epoch=7, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=500, show_metric=True, run_id=MODEL_NAME)

Training Step: 959  | total loss: [1m[32m2.30471[0m[0m | time: 8.290s
| Adam | epoch: 012 | loss: 2.30471 - acc: 0.5337 -- iter: 5056/5106
Training Step: 960  | total loss: [1m[32m2.28197[0m[0m | time: 9.385s
| Adam | epoch: 012 | loss: 2.28197 - acc: 0.5397 | val_loss: 2.51343 - val_acc: 0.5280 -- iter: 5106/5106
--
