# Data Sprint #35: Osteoarthritis Knee X-ray [ Help doctors to diagnose Osteoarthritis ]

# Importing Required Libraries

In [58]:
import os 
import cv2
import tensorflow as tf
import glob
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from itertools import chain
import numpy as np
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
import pickle
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img
import shutil
from tensorflow.python.keras.backend import shape


* Checking for GPU

In [None]:
import tensorflow as tf
tf.test.gpu_device_name()

# Loading data(in .zip format) from Google drive.


## About the data

* This dataset contains knee X-ray data for both knee joint detection and knee KL grading. The Grade descriptions are as follows:

* Grade 0: Healthy knee image.

* Grade 1 (Doubtful): Doubtful joint narrowing with possible osteophytic lipping

* Grade 2 (Minimal): Definite presence of osteophytes and possible joint space narrowing

* Grade 3 (Moderate): Multiple osteophytes, definite joint space narrowing, with mild sclerosis.

* Grade 4 (Severe): Large osteophytes, significant joint narrowing, and severe sclerosis.

 

## What is KL grading?

* The Kellgren Lawrence grading system is a radiological classification of knee osteoarthritis. It progresses from grade 0 to grade IV and is based on x-rays.

#### * Download the data from https://drive.google.com/file/d/1NdDqPK4NLn2aV8ZdF5ilux1sfG6IyebC/view?usp=sharing
#### * Change the directory to mounted drive
#### * Using "!unzip" command, extract all the contents into Runtime.

In [None]:
os.chdir('/content/sample_data')
!unzip '/content/drive/MyDrive/Colab Notebooks/KneeXray.zip'

## Preparing Images to be fed into CNN.
  1) Create folders with the name of classes(eg : classes=[0,1,2,3,4] perpare 5 folders with names as 0,1,2,3,4

  2) copy the respective files from main source/folder into their classes folder.

  3) with the help of ImageDataGenerator, and its function Flow from directory, created training data. 

In [15]:
labels = pd.read_csv("/content/sample_data/KneeXray/Train.csv")

In [16]:
labels['filename']  = labels['filename'].apply(lambda x: str("/content/sample_data/KneeXray/train/") + str(x))

In [18]:
os.mkdir('/content/sample_data/KneeXray/train_labels')
for i in range(5):
  os.mkdir('/content/sample_data/KneeXray/train_labels/'+str(i))

In [19]:
direc = {0: "/content/sample_data/KneeXray/train_labels/0",
        1: "/content/sample_data/KneeXray/train_labels/1",
        2: "/content/sample_data/KneeXray/train_labels/2",
        3: "/content/sample_data/KneeXray/train_labels/3",
        4: "/content/sample_data/KneeXray/train_labels/4"}

for i,j in zip(labels['label'], labels['filename']):
    if i in direc.keys():
        shutil.copy(j, direc[i])

In [None]:
len(glob.glob('/content/sample_data/KneeXray/train_labels/0' + "/*jpg"))

* ImageDataGenerator is a class of "tensorflow.keras.preprocessing.image"

In [60]:
train = ImageDataGenerator(rescale=(1/255))

* flow_from_directory helps us prepare the dataset as in the required format. 
* subset arument must be added if the validation data is created along with train in InageDataGenerator. Subset determines the dataset(i.e training data or Validation data)

In [61]:
train_dat = train.flow_from_directory(
                        directory = '/content/sample_data/KneeXray/train_labels/',
                        target_size=(224, 224),
                        classes=None,
                        class_mode='categorical',
                        seed = 42,
                        shuffle = True,
                        batch_size=200
                        #subset = "training"
                        )

Found 7828 images belonging to 5 classes.


In [None]:
pd.Series(train_dat.classes).value_counts()

* In case of using a validation dataset, add validation_size in ImageDataGenerator ranging from 0-1



In [49]:
# val_dat = train.flow_from_directory(
#                         directory = '/content/sample_data/KneeXray/train_labels',
#                         target_size=(224, 224),
#                         classes=None,
#                         class_mode='categorical',
#                         seed = 42,
#                         shuffle = True,
#                         batch_size=200,
#                         subset="validation")


Found 1956 images belonging to 5 classes.


# Model Building 

* Multiple changes can be done in architechture in future for better performance.

In [63]:

basic_model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(16,(3,3), activation = 'relu', input_shape = (224,224,3)),
                                     tf.keras.layers.MaxPool2D(2,2),
                                     tf.keras.layers.Conv2D(32,(3,3), activation = 'relu'),
                                     tf.keras.layers.MaxPool2D(2,2),
                                     tf.keras.layers.Conv2D(64,(3,3), activation = 'relu'),
                                     tf.keras.layers.MaxPool2D(2,2),
                                     tf.keras.layers.Flatten(),
                                     tf.keras.layers.Dense(512, activation = 'relu'),
                                     tf.keras.layers.Dense(1, activation = 'softmax')
                                     ])


In [64]:
from tensorflow.keras.optimizers import RMSprop,SGD
basic_model.compile(loss = 'categorical_crossentropy',
              optimizer = RMSprop(learning_rate=0.001),
              metrics = ['accuracy'])

In [None]:
basic_model_fit = basic_model.fit(train_dat,
                        steps_per_epoch = 40,
                        epochs = 30,
                        # validation_data=val_dat,
                        # validation_batch_size=150,
                        # validation_steps=20
                        )

In [67]:
os.chdir('/content/drive/MyDrive/Colab Notebooks')

In [None]:
basic_model.save('xrknee30052219.tf', overwrite = True)

In [None]:
basic_model_fit.history

# Testing 




* Testing data are provided in two forms.
    * 1) .csv file with images names in alphanumeric order supposed as reference for submission format
    * 2) Actual Images in a Folder named Test  

In [70]:
test_labels = pd.read_csv("/content/sample_data/KneeXray/Test.csv")

* A user defined function is created to prepare each image into required spec's inorder to feed into model for predictions.

* Images names and their predicted class result is stored in a list ([['Image_1.jpg', 2], ['Image_2.jpg', 0],...] format to easily convert into dataframe for smooth processsing.

In [132]:
def predict():
  op = []
  for i in test_labels['filename']:
    val = '/content/sample_data/KneeXray/test/'+str(i)
    img_resize = tf.image.resize(plt.imread(val), [224, 224]).numpy()
    img_resize = img_resize.astype(np.uint8)
    img = np.ndarray(shape = (1, 224, 224, 3), dtype=np.float32)
    op.append([i, np.argmax(basic_model.predict(img)[0].tolist())])
  return(op)


In [134]:
output = predict()

In [137]:
pd.DataFrame(output,columns = ['filename','label']).to_csv('Final_output.csv', encoding = 'utf-8')