## Learning from Kaggle Notebook

### Using tensorflow for image recognition

In [None]:
pip install pydicom

In [None]:
!pip install -q ../input/for-pydicom/pylibjpeg-1.4.0-py3-none-any.whl
!pip install -q ../input/for-pydicom/python_gdcm-3.0.14-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install -q ../input/for-pydicom/pylibjpeg_libjpeg-1.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

In [None]:
pip install pylibjpeg pylibjpeg-libjpeg pydicom

In [None]:
import os 
import pathlib
import glob 
from tqdm import tqdm 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pydicom
from pydicom import dcmread
import pylibjpeg
import tensorflow as tf

In [None]:
import boto3

s3 = boto3.resource('s3')
bucket_name = 'evan-callaghan-bucket'
bucket = s3.Bucket(bucket_name)

file_key = 'Kaggle-Spine-Fracture-Detection/train.csv'
file_key2 = 'Kaggle-Spine-Fracture-Detection/train_bounding_boxes.csv'

bucket_object = bucket.Object(file_key)
bucket_object2 = bucket.Object(file_key2)

file_object = bucket_object.get()
file_object2 = bucket_object2.get()

file_content_stream = file_object.get('Body')
file_content_stream2 = file_object2.get('Body')

## Reading the data
train = pd.read_csv(file_content_stream)
train_boxes = pd.read_csv(file_content_stream2)

In [None]:
sns.set()

## Defining parameters
EPOCHS = 10
BATCH_SIZE = 16
IMAGE_SIZE = (512, 512)
SEED = 42

## Setting seed
np.random.seed(SEED)
tf.random.set_seed(SEED)

## Loading the DICOM files properly

In [None]:
## Subsetting the train data-frame
train = train[np.isin(train['StudyInstanceUID'], ['1.2.826.0.1.3680043.10606', '1.2.826.0.1.3680043.10815', 
                                                  '1.2.826.0.1.3680043.12121'])].reset_index(drop = True)

In [None]:
## Defining an empty list to store all images
img_list = []

## Looping through all study instances in example
for instance in train['StudyInstanceUID'].unique():
    
    ## Getting all images for the particular instance
    img_to_append = glob.glob(f'Sample_Images/{instance}/*.dcm')
    
    ## Appending all images to img_list
    img_list.extend(img_to_append)
    
## Printing the length of the list
print(len(img_list))

In [None]:
def load_dicom(path):
    """
    reads a dicom file and loads the image array inside it
    inputs:
        path - the path of the required dicom file
    returns:
        data - image pixel arrays
    """
    img = pydicom.dcmread(path)
    data = img.pixel_array
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

In [None]:
def data_generator():
    """
    a function that will load the dataset from a list of image paths
    """
    for path in img_list:
        data = load_dicom(path)
        yield data  # return the data has generator

In [None]:
## Defining a tensorflow dataset variable that will use the generator to load the image data
train_dataset = tf.data.Dataset.from_generator(data_generator, (tf.uint8))

## Looking of the dataset contents
for i in train_dataset.take(1):
    print(i.shape)
    print(type(i))

In [None]:
def show_single(img, cmap="inferno"):
    """
    plots a single image
    """
    plt.imshow(img, cmap=cmap)
    plt.axis("off")
    
show_single(i)

In [None]:
def show_batch(cmap="inferno"):
    """
    visualizes a batch of images
    """
    plt.figure(figsize=(16, 12))
    for i, img in enumerate(train_dataset.take(10)):  # iterate through the dataset
        plt.subplot(2, 5, i+1)
        show_single(img, cmap=cmap)
    plt.show()
    
show_batch()

## Generating data from the DICOM files to be used for modeling

In [None]:
def data_generator():
    
    ## Looping through all unique StudyInstanceUID values
    for i, study_instance in enumerate(train['StudyInstanceUID'].unique()):
        
        ## Loopig through all images in the associated instance folder
        for dcm in os.listdir(f'Sample_Images/{study_instance}'):
            
            ## Defining an empty list to eventually store train labels
            train_labels = []
            
            ## Defining the image path
            path = f'Sample_Images/{study_instance}/{dcm}'
            
            ## Loading image from path
            img = load_dicom(path)
            
            ## Resizing each image into a shape of (512, 512) and then normalizing
            img = np.resize(img, (512, 512))
            img = img / 255.0
            
            ## Converting from gray scale to rgb
            img = tf.expand_dims(img, axis=-1)
            img = tf.image.grayscale_to_rgb(img)
            
            ## Appending train labels to each image
            train_labels.extend([
                train.loc[i, "C1"],
                train.loc[i, "C2"],
                train.loc[i, "C3"],
                train.loc[i, "C4"],
                train.loc[i, "C5"],
                train.loc[i, "C6"],
                train.loc[i, "C7"],
                train.loc[i, "patient_overall"]])
            
            ## Yielding the image and associated labels
            yield img, train_labels

In [None]:
train_data = tf.data.Dataset.from_generator(data_generator, (tf.float32, tf.int8))

In [None]:
for img, label in train_data.take(1):
    print(img.shape)
    print(label.shape)
    print(label)

## Modeling

In [None]:
## Counting the number of total images
img_count = 0
for _, _ in enumerate(train['StudyInstanceUID'].unique()):
    for _ in os.listdir(f'Sample_Images/{_}'):
        img_count += 1
print(img_count)

In [None]:
## Splitting the data into train and validation sets
val_size = int(img_count * 0.2)
train_data = train_data.skip(val_size)
val_data = train_data.take(val_size)

In [None]:
def configure_for_performance(data):
    data = data.cache()
    data = data.batch(16)
    data = data.prefetch(buffer_size=tf.data.AUTOTUNE)
    return data

In [None]:
train_data = configure_for_performance(train_data)
val_data = configure_for_performance(val_data)

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dense, Dropout, Flatten

In [None]:
# Defining Alex Net model

def alex_net():
    model = Sequential()

    # 1st Convolutional Layer
    model.add(Conv2D(filters=96, input_shape=(512,512,3), kernel_size=(11,11),\
     strides=(4,4), padding='valid', activation="relu"))
    # Pooling 
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
    # Batch Normalisation before passing it to the next layer
    model.add(BatchNormalization())

    # 2nd Convolutional Layer
    model.add(Conv2D(filters=256, kernel_size=(11,11), strides=(1,1), padding='valid', activation="relu"))
    # Pooling
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
    # Batch Normalisation
    model.add(BatchNormalization())

    # 3rd Convolutional Layer
    model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid', activation="relu"))
    # Batch Normalisation
    model.add(BatchNormalization())

    # 4th Convolutional Layer
    model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid', activation="relu"))
    # Batch Normalisation
    model.add(BatchNormalization())

    # 5th Convolutional Layer
    model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='valid', activation="relu"))
    # Pooling
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
    # Batch Normalisation
    model.add(BatchNormalization())

    # Passing it to a dense layer
    model.add(Flatten())
    # 1st Dense Layer
    model.add(Dense(4096, input_shape=(512*512*3,), activation="relu"))
    # Add Dropout to prevent overfitting
    model.add(Dropout(0.4))
    # Batch Normalisation
    model.add(BatchNormalization())

    # 2nd Dense Layer
    model.add(Dense(4096, activation="relu"))
    # Add Dropout
    model.add(Dropout(0.4))
    # Batch Normalisation
    model.add(BatchNormalization())

    # 3rd Dense Layer
    model.add(Dense(1000, activation="relu"))
    # Add Dropout
    model.add(Dropout(0.4))
    # Batch Normalisation
    model.add(BatchNormalization())

    # Output Layer with 8 probability classes
    model.add(Dense(8, activation="softmax"))
    return model

In [None]:
## Calling the Alex Net model
model = alex_net()

## Printing model results
model.summary()

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=[tf.keras.metrics.CategoricalAccuracy()]
             )

In [None]:
# training
history = model.fit(train_data, validation_data=val_data,
                   epochs=EPOCHS)

In [None]:
# visualize training 
def viz_loss(history):
    train_loss = history["loss"]
    val_loss = history["val_loss"]
    iters = [i for i in range(EPOCHS)]
    
    plt.plot(iters, train_loss, label="Training Loss")
    plt.plot(iters, val_loss, label="Validation Loss")
    plt.title("A plot of Loss against number of iterations")
    plt.legend()
    plt.show()
    
def viz_acc(history):
    train_loss = history["categorical_accuracy"]
    val_loss = history["val_categorical_accuracy"]
    iters = [i for i in range(EPOCHS)]
    
    plt.plot(iters, train_loss, label="Training Accuracy")
    plt.plot(iters, val_loss, label="Validation Accuracy")
    plt.title("A plot of Accuracy against number of iterations")
    plt.legend()
    plt.show()
    
viz_loss(history.history)
viz_acc(history.history)

## More Tensorflow for image classification

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
import pathlib
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)

In [None]:
## Printing a count of total images in the data set
image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)

In [None]:
roses = list(data_dir.glob('roses/*'))
PIL.Image.open(str(roses[0]))

In [None]:
tulips = list(data_dir.glob('tulips/*'))
PIL.Image.open(str(tulips[0]))

In [None]:
## Setting some parameters
batch_size = 32
img_height = 180
img_width = 180

In [None]:
## Defining training and validation sets

train_ds = tf.keras.utils.image_dataset_from_directory( data_dir, validation_split=0.2, subset="training", seed=123, 
                                                       image_size=(img_height, img_width), batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory( data_dir, validation_split=0.2, subset="validation", 
                                                     seed=123, image_size=(img_height, img_width), batch_size=batch_size)

In [None]:
## Printing the classes
class_names = train_ds.class_names
print(class_names)

In [None]:
## Creating a visualization

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

In [None]:
for image_batch, labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

In [None]:
## Configuring data sets for performance
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
## Normalizing the RBG values for each image

normalization_layer = layers.Rescaling(1./255)

normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
print(np.min(first_image), np.max(first_image))

In [None]:
## Building a basic Keras model

num_classes = len(class_names)

model = Sequential([
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)), 
    layers.Conv2D(16, 3, padding='same', activation='relu'), 
    layers.MaxPooling2D((2,2), padding='same'), 
    layers.Conv2D(32, 3, padding='same', activation='relu'), 
    layers.MaxPooling2D((2,2), padding='same'),
    layers.Conv2D(64, 3, padding='same', activation='relu'), 
    layers.MaxPooling2D((2,2), padding='same'), 
    layers.Flatten(), 
    layers.Dense(128, activation='relu'), 
    layers.Dense(num_classes)
])

In [None]:
## Compiling the model

model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), 
              metrics = ['accuracy'])

In [None]:
## Printing the model summary
model.summary()

In [None]:
## Training the model

epochs=10
history = model.fit(train_ds, validation_data = val_ds, epochs = epochs)

## Tensorflow practice

In [1]:
import tensorflow as tf

# Load the Fashion MNIST dataset
fmnist = tf.keras.datasets.fashion_mnist
(training_images, training_labels), (test_images, test_labels) = fmnist.load_data()

# Normalize the pixel values
training_images = training_images / 255.0
test_images = test_images / 255.0

In [2]:
# Define the model
model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation = tf.nn.relu),
        tf.keras.layers.Dense(10, activation = tf.nn.softmax)
])

# Setup training parameters
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

# Train the model
print(f'\nMODEL TRAINING:')
model.fit(training_images, training_labels, epochs = 5)

# Evaluate on the test set
print(f'\nMODEL EVALUATION:')
test_loss = model.evaluate(test_images, test_labels)

2022-08-29 16:25:42.280598: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-08-29 16:25:42.280686: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-172-16-78-8.ca-central-1.compute.internal): /proc/driver/nvidia/version does not exist
2022-08-29 16:25:42.281302: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.



MODEL TRAINING:
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

MODEL EVALUATION:


In [16]:
# Define the model
model = tf.keras.models.Sequential([
                                                          
    # Add convolutions and max pooling 
    tf.keras.layers.Conv2D(32, (3,3), activation = 'relu'), 
    tf.keras.layers.MaxPooling2D(pool_size = (2, 2)), 
    tf.keras.layers.Conv2D(32, (3,3), activation = 'relu'), 
    tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
 
    # Add the same layers as before 
    tf.keras.layers.Flatten(), 
    tf.keras.layers.Dense(128, activation = 'relu'), 
    tf.keras.layers.Dense(10, activation = 'softmax')

])

In [26]:
model.build(input_shape = (32, 32, 28, 1))

# Print the model summary
model.summary()

# Use same settings
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

# Train the model
print(f'\nMODEL TRAINING:')
model.fit(training_images, training_labels, epochs = 5)

# Evaluate on the test set
print(f'\nMODEL EVALUATION:')
test_loss = model.evaluate(test_images, test_labels)

ValueError: Input 0 of layer "conv2d_22" is incompatible with the layer: expected axis -3of input shape to have value 28, but received input with shape (32, 32, 28, 1)