# Description

## Pre trained model (VGG-16) :
The VGG-16 model is a pre-trained deep learning model that was introduced by the Visual Geometry Group (VGG) at the University of Oxford. It is a convolutional neural network (CNN) architecture that has achieved impressive performance on various computer vision tasks, including image classification, object detection, and image segmentation.

![VGG-16 architecture](https://miro.medium.com/v2/resize:fit:828/0*0M8CobXpNwFDCmOQ)

The VGG-16 architecture is a deep convolutional neural network (CNN) model that consists of 16 layers, including 13 convolutional layers and 3 fully connected layers. Here is a summary of the VGG-16 architecture:

import library

In [2]:
from os import listdir
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split

# Pre processing

Load data and remove rows with **DM** type

In [3]:
excel_data = pd.read_excel('./Radiology_manual_annotations.xlsx')
excel_data = excel_data[excel_data['Type'] != 'DM']

In [4]:
excel_data.head()

Unnamed: 0,Image_name,Patient_ID,Side,Type,Age,Breast density (ACR),BIRADS,Findings,View,Tags,Machine,Pathology Classification/ Follow up
1,P1_L_CM_MLO,1,L,CESM,46,_,4,Heterogenous non mass enhancement,MLO,"suspicious, non mass",1,Malignant
4,P2_R_CM_CC,2,R,CESM,31,_,3,Multiple homogenously enhancing masses,CC,"benign, masses, homogenous",2,Benign
5,P2_R_CM_MLO,2,R,CESM,31,_,3,Multiple homogenously enhancing masses,MLO,"benign, masses, homogenous",2,Benign
8,P2_L_CM_CC,2,L,CESM,31,_,2,Flap with no enhancement,CC,"flap, benign",2,Benign
9,P2_L_CM_MLO,2,L,CESM,31,_,2,Flap with no enhancement,MLO,"flap, benign",2,Benign


Rename columns name

In [5]:
excel_data.rename(columns={'Pathology Classification/ Follow up': 'PathologyClassification'}, inplace=True)
excel_data.rename(columns={'Breast density (ACR)': 'Breast_cancer'}, inplace=True)

In [6]:
excel_data.head()

Unnamed: 0,Image_name,Patient_ID,Side,Type,Age,Breast_cancer,BIRADS,Findings,View,Tags,Machine,PathologyClassification
1,P1_L_CM_MLO,1,L,CESM,46,_,4,Heterogenous non mass enhancement,MLO,"suspicious, non mass",1,Malignant
4,P2_R_CM_CC,2,R,CESM,31,_,3,Multiple homogenously enhancing masses,CC,"benign, masses, homogenous",2,Benign
5,P2_R_CM_MLO,2,R,CESM,31,_,3,Multiple homogenously enhancing masses,MLO,"benign, masses, homogenous",2,Benign
8,P2_L_CM_CC,2,L,CESM,31,_,2,Flap with no enhancement,CC,"flap, benign",2,Benign
9,P2_L_CM_MLO,2,L,CESM,31,_,2,Flap with no enhancement,MLO,"flap, benign",2,Benign


In [7]:
path = r"./Subtracted_images_of_CDD_CESM/"
imageFile = listdir(path)
print(imageFile[1])
print(imageFile[-1])

P100_L_CM_MLO.jpg
P9_L_CM_MLO.jpg


sort excel_Data by **Image_name** and **Phatology_Classification**

In [8]:
excel_data_sorted_by_Image_name = excel_data.sort_values(by='Image_name')
Image_labels = excel_data_sorted_by_Image_name[['Image_name','PathologyClassification']]


In [9]:
Image_labels['CategoryCode'] = Image_labels['PathologyClassification'].astype('category').cat.codes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Image_labels['CategoryCode'] = Image_labels['PathologyClassification'].astype('category').cat.codes


In [10]:

Image_labels.to_csv('Image_labels.csv' , index=False)

In [11]:
df = pd.read_csv('./Image_labels.csv')

df['Image_name'] = df['Image_name'] + '.jpg'

train_df , temp_df = train_test_split(df , test_size = .2 , random_state = 42)

val_df , test_df = train_test_split(temp_df , test_size = .5 , random_state = 42)

In [12]:
val_df.head()

Unnamed: 0,Image_name,PathologyClassification,CategoryCode
96,P129_R_CM_CC.jpg,Normal,2
249,P180_R_CM_CC.jpg,Malignant,1
996,P98_R_CM_MLO.jpg,Normal,2
941,P82_R_CM_MLO.jpg,Normal,2
932,P80_L_CM_MLO.jpg,Benign,0


In [13]:
df.head()

Unnamed: 0,Image_name,PathologyClassification,CategoryCode
0,P100_L_CM_CC.jpg,Benign,0
1,P100_L_CM_MLO.jpg,Benign,0
2,P100_R_CM_CC.jpg,Normal,2
3,P100_R_CM_MLO.jpg,Normal,2
4,P101_L_CM_CC.jpg,Normal,2


In [14]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, Flatten , LeakyReLU , BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.vgg16 import preprocess_input

Image data generator using custom generator

In [15]:
datagen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    preprocessing_function=preprocess_input
)

In [16]:
datagentest = ImageDataGenerator(
    rescale = 1./255
)

In [17]:
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator


train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='./Resized_image', 
    x_col='Image_name',  
    y_col='PathologyClassification', 
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',  

)

val_generator = datagen.flow_from_dataframe(
    dataframe=val_df,
    directory='./Resized_image',  
    x_col='Image_name',  
    y_col='PathologyClassification', 
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',

)

test_generator = datagentest.flow_from_dataframe(
    dataframe=test_df,
    directory='./Resized_image',  
    x_col='Image_name',  
    y_col='PathologyClassification', 
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',  
    shuffle=False  
)

Found 802 validated image filenames belonging to 3 classes.
Found 100 validated image filenames belonging to 3 classes.
Found 101 validated image filenames belonging to 3 classes.


Load the vgg16 model

In [18]:
baseModel = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in baseModel.layers:
    layer.trainable = False

In [19]:
baseModel.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [20]:
model = Sequential()
model.add(baseModel)
model.add(Flatten())
model.add(Dense(224, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

In [21]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 256)               6422784   
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 3)                 771       
                                                                 
Total params: 21,138,243
Trainable params: 6,423,555
Non-trainable params: 14,714,688
_________________________________________________________________


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

model_checkpoint = ModelCheckpoint(
    filepath='./gdrive/MyDrive/Breast_Cancer_Project/bestModel.h5',
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)


# Train the model

In [23]:
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=val_generator,
    batch_size=32,
    verbose = 1,
    callbacks = [
        model_checkpoint
        ]
    )

accuracy chart

In [24]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='Training accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


loss chart

In [5]:
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

NameError: name 'plt' is not defined

# Evaluate model

In [6]:
batch_size = 64
test_los, test_acc = model.evaluate(test_generator,steps=test_generator.samples // batch_size, verbose=1)

NameError: name 'model' is not defined