In [99]:
import os
import cv2
import tqdm 
import numpy as np
import pandas as pd

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader

## Loading Data

In [100]:
dicom=pd.read_csv('Dataset/DDSM/csv/dicom_info.csv')

dicom=dicom[dicom.SeriesDescription=='cropped images']
dicom=dicom.reset_index(drop=True)
dicom['PatientID_simplified']=dicom['PatientID'].apply(lambda x:'_'.join(x.split('_')[1:3]))
dicom['lr_simplified']=dicom['PatientID'].apply(lambda x:x.split('_')[-3])
dicom=dicom.drop_duplicates(subset=['lr_simplified','PatientID_simplified']).reset_index(drop=True)
addresses=np.array(dicom['image_path'].apply(lambda x:x.replace("CBIS-",'Dataset/')))

In [101]:
def load_image(image_address):
    im=cv2.imread(image_address)
    im=cv2.resize(im,(256,256))
    return im

def load_data_pid_lr(pid,lr):
    temp=dicom[(dicom['PatientID_simplified']==pid) & (dicom['lr_simplified']==lr)]
    return load_image(temp.iloc[0]['image_path'].replace("CBIS-",'Dataset/'))
def load_data(df):
    x=[]
    y=[]
    for i in tqdm.tqdm(range(df.shape[0])):
        x.append(load_data_pid_lr(df.iloc[i]['patient_id'],df.iloc[i]['left or right breast']))
        y.append(df.iloc[i]['pathology'])
    return np.array(x),np.array(y)

In [102]:
# images=[]
# for item in tqdm.tqdm(addresses):
#     images.append(load_image(item))
# images=np.array(images)
# cv2.imshow('test',images[56])
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [106]:
replace_dict={'BENIGN_WITHOUT_CALLBACK':0.0001,'BENIGN':0.0001,'MALIGNANT':0.9999}
calc_train=pd.read_csv('Dataset/DDSM/csv/calc_case_description_train_set.csv')
calc_train=calc_train.drop_duplicates(subset=['patient_id','left or right breast'])
calc_train['pathology']=calc_train['pathology'].replace(replace_dict)


calc_test=pd.read_csv('Dataset/DDSM/csv/calc_case_description_test_set.csv')
calc_test=calc_test.drop_duplicates(subset=['patient_id','left or right breast'])
calc_test['pathology']=calc_test['pathology'].replace(replace_dict)

mass_train=pd.read_csv('Dataset/DDSM/csv/mass_case_description_train_set.csv')
mass_train=mass_train.drop_duplicates(subset=['patient_id','left or right breast'])
mass_train['pathology']=mass_train['pathology'].replace(replace_dict)


mass_test=pd.read_csv('Dataset/DDSM/csv/mass_case_description_test_set.csv')
mass_test=mass_test.drop_duplicates(subset=['patient_id','left or right breast'])
mass_test['pathology']=mass_test['pathology'].replace(replace_dict)

In [107]:
X_train=[]
X_test=[]
y_train=[]
y_test=[]
x,y=load_data(calc_train)
X_train.append(x)
y_train.append(y)
x,y=load_data(calc_test)
X_test.append(x)
y_test.append(y)

x,y=load_data(mass_train)
X_train.append(x)
y_train.append(y)
x,y=load_data(mass_test)
X_test.append(x)
y_test.append(y)
X_train=np.concatenate(X_train,axis=0)
y_train=np.concatenate(y_train,axis=0)
X_test=np.concatenate(X_test,axis=0)
y_test=np.concatenate(y_test,axis=0)

  0%|          | 0/662 [00:00<?, ?it/s]

100%|██████████| 662/662 [00:02<00:00, 263.06it/s]
100%|██████████| 161/161 [00:00<00:00, 311.95it/s]
100%|██████████| 722/722 [00:01<00:00, 449.89it/s]
100%|██████████| 210/210 [00:00<00:00, 416.57it/s]


## Modeling

In [108]:
class P_dataset(Dataset):
    def __init__(self, X,y):
        self.X=X
        self.y=y
        
    def __getitem__(self, item):
            x=self.X[item]
            y=self.y[item]
            return torch.tensor(np.array(x)+1/255,dtype=torch.float32),torch.tensor(np.array(y),dtype=torch.float32)
    def __len__(self):
        return self.y.shape[0]

In [109]:
data_train=P_dataset(X_train,y_train)
data_test=P_dataset(X_test,y_test)
train_loader=DataLoader(data_train,batch_size=16,shuffle=True)
test_loader=DataLoader(data_test,batch_size=16,shuffle=False)

In [143]:
class ConvModel(nn.Module):
    def __init__(self):
        super(ConvModel, self).__init__()
        self.cnn1=nn.Conv2d(3,3,5)
        self.bn1=nn.BatchNorm2d(3)
        self.mp1=nn.MaxPool2d(4,stride=4)
        self.cnn2=nn.Conv2d(3,6,4)
        self.bn2=nn.BatchNorm2d(6)
        self.mp2=nn.MaxPool2d(4,stride=4)
        self.cnn3=nn.Conv2d(6,12,4)
        self.bn3=nn.BatchNorm2d(12)
        self.mp3=nn.MaxPool2d(4,stride=4)
        # self.cnn4=nn.Conv2d(12,24,3)
        # self.mp4=nn.MaxPool2d(2,stride=2)
        # self.cnn5=nn.Conv2d(24,48,3)
        # self.mp5=nn.MaxPool2d(2,stride=2)
        self.final=nn.Linear(12*3*3,1)
    def forward(self, x):
        x=torch.permute(x,(0,3,1,2))
        b=x.size(0)
        x=F.relu(self.bn1(self.cnn1(x)))
        x=self.mp1(x)
        x=F.relu(self.bn2(self.cnn2(x)))
        x=self.mp2(x)
        x=F.relu(self.bn3(self.cnn3(x)))
        x=self.mp3(x)
        # x=self.cnn4(x)
        # x=self.mp4(x)
        # x=self.cnn5(x)
        # x=self.mp5(x)
        x=x.reshape(b,-1)
        x=F.sigmoid(self.final(x))
        return x

In [144]:
BCELOSS=nn.BCELoss()
def loss_fn(y,y_hat):
    y_hat=y_hat.view(-1,)
    loss=BCELOSS(y,y_hat)
    acc=torch.sum((y>0.5)==(y_hat>0.5))/y.shape[0]
    return loss,acc

In [145]:
device='cpu'
model=ConvModel()

opt = torch.optim.Adam(model.parameters(),lr=0.01)

for epoch in range(100):
    train_loss=0
    train_acc=0
    test_loss=0
    test_acc=0
    train_len=0
    test_len=0
    model.train()
    for x,y in train_loader:
        train_len+=x.shape[0]
        x = x.to(device) # GPU
        y = y.to(device)
        opt.zero_grad()
        y_hat = model(x)
        
        loss,acc=loss_fn(y,y_hat)
        train_loss+=loss.detach().numpy()
        train_acc+=acc
        loss.backward()

        opt.step()
    model.eval()    
    for x,y in test_loader:
        test_len+=x.shape[0]
        x = x.to(device) # GPU
        y = y.to(device)
        y_hat = model(x)
        loss,acc=loss_fn(y,y_hat)
        test_loss+=loss.detach().numpy()
        test_acc+=acc
    train_loss=train_loss/len(train_loader)
    train_acc=train_acc/len(train_loader)
    test_loss=test_loss/len(test_loader)
    test_acc=test_acc/len(test_loader)
    print(f"EPOCH {epoch}: train_loss: {train_loss}, test_loss: {test_loss}, train_acc: {train_acc}, test_acc: {test_acc},")
        

EPOCH 0: train_loss: 4.295618190162483, test_loss: 4.6661011897958815, train_acc: 0.5387930870056152, test_acc: 0.4895833432674408,
EPOCH 1: train_loss: 3.975886076346211, test_loss: 3.8464912387231984, train_acc: 0.5696839094161987, test_acc: 0.5911458134651184,
EPOCH 2: train_loss: 4.000957794573115, test_loss: 3.790257347126802, train_acc: 0.5639367699623108, test_acc: 0.5963541865348816,
EPOCH 3: train_loss: 3.9868066324584785, test_loss: 3.7687084674835205, train_acc: 0.5682471394538879, test_acc: 0.592881977558136,
EPOCH 4: train_loss: 3.932878949176306, test_loss: 3.76523091395696, train_acc: 0.5732758641242981, test_acc: 0.5902777910232544,
EPOCH 5: train_loss: 3.8604857126871743, test_loss: 3.7514585306247077, train_acc: 0.5833333134651184, test_acc: 0.5902777910232544,
EPOCH 6: train_loss: 3.910719082273286, test_loss: 3.9738141049941382, train_acc: 0.5761494040489197, test_acc: 0.569444477558136,
EPOCH 7: train_loss: 3.780059697984279, test_loss: 4.552475370022269, train_acc

KeyboardInterrupt: 