In [6]:
import numpy,pandas,os,sklearn,keras
import matplotlib.pyplot as plt

import utils

# Image Data

## Suspicious Mammographies (BCDR-D01 & BCDR-D02)

### CSV Analysis

In [None]:
bcdr1_raw = pd.read_csv('d:/BCDR/BCDR-D01_dataset/bcdr_d01_img.csv')
bcdr1_raw = bcdr1_raw.drop_duplicates(subset=['image_filename'],keep='first',ignore_index=True)
bcdr1_raw= utils.fix_view(bcdr1_raw,'image_type_name')
bcdr1_features_raw = pd.read_csv('D:\BCDR\BCDR-D01_dataset/bcdr_d01_features.csv')
bcdr2_raw = pd.read_csv('d:/BCDR/BCDR-D02_dataset/bcdr_d02_img.csv')
bcdr2_raw = bcdr2_raw.drop_duplicates(subset=['image_filename'],keep='first',ignore_index=True)
bcdr2_raw= utils.fix_view(bcdr2_raw,'image_type_name')
bcdr2_features_raw = pd.read_csv('D:\BCDR\BCDR-D02_dataset/bcdr_d02_features.csv')
l1 = utils.lesion_findings(bcdr1_features_raw)
l2 = utils.lesion_findings(bcdr2_features_raw)

In [None]:
bcdr1 = bcdr1_raw[['patient_id','study_id','image_filename','image_type_name','density','age']]
bcdr1_features= bcdr1_features_raw[['patient_id','study_id','image_view','s_x_center_mass','s_y_center_mass','density','age']]

bcdr1 = utils.merge_csv(bcdr1_features,bcdr1)
bcdr1['lesion_type'] = l1

In [None]:
bcdr2 = bcdr2_raw[['patient_id','study_id','image_filename','image_type_name','density','age']]
bcdr2_features= bcdr2_features_raw[['patient_id','study_id','image_view','s_x_center_mass','s_y_center_mass','density','age']]

bcdr2 = utils.merge_csv(bcdr2_features,bcdr2)
bcdr2['lesion_type'] = l2

In [None]:
lesion_mammographies1 = pd.DataFrame({})
lesion_mammographies1[['patient_id','image_view','image_path','x_center','y_center','density','age','lesion_type']] = bcdr1[['patient_id','image_view','image_filename','s_x_center_mass','s_y_center_mass','density','age','lesion_type']]
new_patients = []
for patient in list(lesion_mammographies1['patient_id']):
    patient_n= '1d'+str(patient)
    new_patients.append(patient_n)
lesion_mammographies1['patient_id'] = new_patients
lesion_mammographies1 = utils.fix_bcdr1_path(lesion_mammographies1,'image_path')

In [None]:
lesion_mammographies2 = pd.DataFrame({})
lesion_mammographies2[['patient_id','image_view','image_path','x_center','y_center','density','age','lesion_type']] = bcdr2[['patient_id','image_view','image_filename','s_x_center_mass','s_y_center_mass','density','age','lesion_type']]
new_patients = []
for patient in list(lesion_mammographies2['patient_id']):
    patient_n= '2d'+str(patient)
    new_patients.append(patient_n)
lesion_mammographies2['patient_id'] = new_patients
lesion_mammographies2 = utils.fix_bcdr2_path(lesion_mammographies2,'image_path')
lesion_mammographies2 = lesion_mammographies2

In [None]:
lesion_mammographies = pd.concat([lesion_mammographies1,lesion_mammographies2],ignore_index=True)
print('Suspicious Dataset: ',lesion_mammographies.shape)

### Split and Copy

In [None]:
sus_training,sus_validation = tts(lesion_mammographies,test_size=0.3)

In [None]:
utils.image_mover(sus_training,'image_data/raw/training/suspicious')
utils.image_mover(sus_validation,'image_data/raw/validaiton/suspicious')

## Normal Mammographies

### CSV Analysis

In [None]:
bcdrN = pd.read_csv('d:/BCDR/BCDR-DN01_dataset/bcdr_dn01_img.csv')

In [None]:
normal_mammographies1 = pd.DataFrame({})
normal_mammographies1[['patient_id','image_view','image_path','density','age']] = bcdrN[['patient_id','image_type_name','image_filename','density','age']]
normal_mammographies1 = utils.fix_bcdrN_path(normal_mammographies1,'image_path')

In [None]:
xls_raw = pd.read_excel('D:/INBreast/INbreast.xls')
xls = pd.DataFrame()
xls['image_view'] = [xls_raw['Laterality'][i] + xls_raw['View'][i] for i in xls_raw.index] 
xls[['filename','finding notes']] = xls_raw[['File Name','Findings Notes (in Portuguese)']]

In [None]:
path_list = os.listdir('D:/INBreast/AllDICOMs/')
r = []
for path in path_list:
    if path[-3:] != 'dcm':
        r.append(path)
for i in r:
    path_list.remove(i)

In [None]:
patients = []
file_paths = []
image_views = []
for path in path_list:
    l = path.split('_')
    if len(l) > 1:
        patients.append(l[1])
        file_paths.append(path)
        image_views.append(l[3]+l[4])
images_df = pd.DataFrame({'patient_id':patients,'image_view':image_views,'image_path':file_paths})
images_df = utils.fix_inbreast_path(images_df,'image_path')

In [None]:
images_df[['finding notes']] = xls[['finding notes']]
normal_df = images_df[images_df['finding notes'] == 'normal']
normal_mammographies2 = pd.DataFrame()
normal_mammographies2[['patient_id','image_view','image_path']] = normal_df[['patient_id','image_view','image_path']]

In [None]:
normal_mammographies = pd.concat([normal_mammographies1,normal_mammographies2],ignore_index=True)
print('Normal Dataset: ',normal_mammographies.shape)

### Split and Copy

In [None]:
normal_training,normal_validation = tts(normal_mammographies,test_size=0.3)

In [None]:
utils.image_mover(normal_training,'image_data/raw/training/normal')
utils.image_mover(normal_validation,'image_data/raw/validaiton/normal')

# Numerical Data

## Create Features Documents

In [None]:
utils.mammary_features('image_data/raw/training/normal/','features_training_normal.csv')
utils.mammary_features('image_data/raw/training/suspicious/','features_training_suspicious.csv')
utils.mammary_features('image_data/raw/validation/normal/','features_validation_normal.csv')
utils.mammary_features('image_data/raw/validation/suspicious/','features_validation_suspicious.csv')

## Load Features Documents

# Train Image Model

## Crop Background and Downsample

In [None]:
utils.downsample('image_data/raw/training/normal/','image_data/downsampled/train/normal/',1000,800)
utils.downsample('image_data/raw/training/suspicious/','image_data/downsampled/train/suspicious/',1000,800)
utils.downsample('image_data/raw/validation/normal/','image_data/downsampled/validation/normal/',1000,800)
utils.downsample('image_data/raw/validation/suspicious/','image_data/downsampled/validation/suspicious/',1000,800)

## Transfer Learning

In [None]:
data_augmentation = True
shuffle = True

training_gen,validation_gen = utils.generator_transfer(1000,800,data_augmentation,shuffle)

In [None]:
model = utils.create_trans_model(1000,800)
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['binary_accuracy','AUC'])

In [None]:
model.fit(
    training_gen,
    epochs=25,
    validation_data=validation_gen)

In [None]:
model = keras.models.load_model('transfer.h5')

NameError: name 'keras' is not defined

In [None]:
data = utils.generator_transfer(1000,800,True,False)
y_true = data[1].classes
y_pred = model.predict(data[1],data[1].samples//12+1)
predictions_image = []
for i in y_pred:
    if i[0] > 0.5:
        predictions_image.append(1)
    else:
        predictions_image.append(0)

SyntaxError: invalid syntax (<ipython-input-1-08553f1fcf16>, line 1)

In [None]:
c = sklearn.metris.confusion_matrix(y_true,predictions_image)
sens = c[1][1]/(c[1][1]+c[1][0])
spec = c[0][0]/(c[0][0]+c[0][1])
print(c)
print('Sensitivity:',sens)
print('Specificity:',spec)

In [None]:
auc = sklearn.metrics.roc_auc_score(ground_truth,classifications)
fpr,tpr,_= sklearn.metrics.roc_curve(y_true,y_pred)
plt.plot(fpr,tpr)
print('AUC:',auc)

## From-Scratch

In [None]:
data_augmentation = True
shuffle = True

training_gen,validation_gen = utils.generator_transfer(1000,800,data_augmentation,shuffle)

In [None]:
model = utils.create_trans_model(1000,800)
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['binary_accuracy','AUC'])

In [None]:
model.fit(
    training_gen,
    epochs=25,
    validation_data=validation_gen)

In [None]:
model = keras.models.load_model('transfer.h5')

In [None]:
data = utils.generator_transfer(1000,800,True,False)
y_true = data[1].classes
y_pred = model.predict(data[1],data[1].samples//12+1)
predictions_image = []
for i in y_pred:
    if i[0] > 0.5:
        predictions_image.append(1)
    else:
        predictions_image.append(0)

In [None]:
c = sklearn.metris.confusion_matrix(y_true,predictions_image)
sens = c[1][1]/(c[1][1]+c[1][0])
spec = c[0][0]/(c[0][0]+c[0][1])
print(c)
print('Sensitivity:',sens)
print('Specificity:',spec)

In [None]:
auc = sklearn.metrics.roc_auc_score(ground_truth,classifications)
fpr,tpr,_= sklearn.metrics.roc_curve(y_true,y_pred)
plt.plot(fpr,tpr)
print('AUC:',auc)