<a href="https://githubtocolab.com/Hitsh987/projet_IARN/blob/master/endoscopy_multiClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Classification of Anomalies in Gastrointestinal Tract through Endoscopic Imagery with Deep Learning


### we used opencv for preproccesing the image dataset

### we used tensorflow and keras libarary for machine learning stuff


In [20]:
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from tensorflow.keras import applications
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.preprocessing.image import ImageDataGenerator

### first we download dataset and save dataset directory name


In [21]:
def download_dataset(URL, dataset_name):
    path_to_zip = tf.keras.utils.get_file(
        f"{dataset_name}.zip", origin=URL, extract=True
    )
    path = os.path.join(os.path.dirname(path_to_zip), dataset_name)
    return path


dataset_name = "kvasir-dataset"
URL = f"https://datasets.simula.no/downloads/kvasir/{dataset_name}.zip"

dataset_dir = download_dataset(URL, dataset_name)
print(f"Dataset path: {dataset_dir}")


Dataset path: /root/.keras/datasets/kvasir-dataset


### define the categories the dataset categories(class)


In [22]:
def get_dataCategories(dataset_dir):
    categories = [
        folder_name
        for folder_name in os.listdir(dataset_dir)
        if os.path.isdir(os.path.join(dataset_dir, folder_name))
    ]

    return sorted(categories)


categories = get_dataCategories(dataset_dir)
nbr_categories = len(categories)
print("number of categories: ", nbr_categories)
print("categories:")
for i, c in enumerate(categories):
    print(f"  {i+1}.{c}")


number of categories:  8
categories:
  1.dyed-lifted-polyps
  2.dyed-resection-margins
  3.esophagitis
  4.normal-cecum
  5.normal-pylorus
  6.normal-z-line
  7.polyps
  8.ulcerative-colitis


### using the opencv, we read images from dataset directory and resize images to 100\*100

### then insert:

- the resized image in `X`
- class lable in `y`


In [23]:
# load dataset
def create_dataset(datadir, categories, img_wid, img_high):
    X = []
    y = []
    for category in categories:
        path = os.path.join(datadir, category)
        class_num = categories.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                X.append(cv2.resize(img_array, (img_wid, img_high)))
                y.append(class_num)
            except Exception as e:
                pass

    y = np.array(y)
    X = np.array(X).reshape(y.shape[0], img_wid, img_wid, 3)
    return X, y


img_wid, img_high = 100, 100
X, y = create_dataset(dataset_dir, categories, img_wid, img_high)

print(f"X: {X.shape}")
print(f"y: {y.shape}")


X: (4000, 100, 100, 3)
y: (4000,)


In [None]:
# split dataset to train and test set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=42
)

# reshape dataset to have a single channel
width, height, channels = X_train.shape[0], X_train.shape[1], 1

X_train = X_train.reshape((X_train.shape[0], width, height, channels))
X_test = X_test.reshape((X_test.shape[0], width, height, channels))

# one hot encode target values
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# confirm scale of pixels
print('Train min=%.3f, max=%.3f' % (X_train.min(), X_train.max()))
print('Test min=%.3f, max=%.3f' % (X_test.min(), X_test.max()))

# create generator (1.0/255.0 = 0.003921568627451)
datagen = ImageDataGenerator(rescale=1.0/255.0)
# prepare an iterators to scale images
train_iterator = datagen.flow(X_train, y_train, batch_size=64)
test_iterator = datagen.flow(X_test, y_test, batch_size=64)


In [None]:
# define model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation="relu", input_shape=(width, height, channels)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation="relu"))
model.add(Dense(10, activation="softmax"))

# compile model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])


In [None]:
# fit model with generator
model.fit_generator(train_iterator, steps_per_epoch=len(train_iterator), epochs=5)

In [None]:
# evaluate model
_, acc = model.evaluate_generator(test_iterator, steps=len(test_iterator), verbose=0)
print('Test Accuracy: %.3f' % (acc * 100))

In [None]:
# from sklearn.linear_model import LogisticRegression

# logistic_reg = LogisticRegression()
# logistic_reg.fit(X, y)

# # logistic_reg.score(X, y)

# y_pred = np.zeros(X.shape[0])
# for i in range(len(y_pred)):
#     p = logistic_reg.predict([X[i, :]])
#     y_pred[i] = p

# prcision = np.mean(y == y_pred) * 100
# print("pourcentage de precision: {:.2f}%".format(prcision))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


pourcentage de precision: 73.85%


In [None]:


X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=42
)

X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255
X_test /= 255
y_train = tf.keras.utils.to_categorical(y_train, nbr_categories)
y_test = tf.keras.utils.to_categorical(y_test, nbr_categories)


In [19]:
from sklearn.neural_network import MLPClassifier

nn = MLPClassifier(
    # hidden_layer_sizes=(hidden_layer_size,),
    activation="logistic",
    solver="lbfgs",
    alpha=1,
    max_iter=1500,
)
nn.fit(X, y)

# precision = nn.score(X, y)
score = nn.evaluate(X_test, y_test, verbose=0)
print("Test loss:", round(score[0], 2))
print("Test accuracy:", round(score[1], 2))


KeyboardInterrupt: ignored

### Show random image for each category


In [None]:
plt.figure(figsize=(12, 5))
st, end = 0, 500
for i in range(8):
    plt.subplot(2, 4, i + 1)
    idx = np.random.randint(st, end)
    st = end + 1
    end = (i + 2) * 500
    # plt.imshow(X[idx][:,:,::-1])
    plt.imshow(X[idx], cmp="gray")
    plt.title(f"{i+1}. {categories[y[idx]]}")
    plt.axis("off")
plt.show()


### partition the data into training and testing splits using 80% of the data for training and the remaining 20% for testing


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=42
)


### then preprocess dataset by scaling all pixel intensities to the range [0, 1]


In [None]:
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255
X_test /= 255


### encode the labels (which are currently strings) as integers and then one-hot encode them


In [None]:
y_train = tf.keras.utils.to_categorical(y_train, nbr_categories)
y_test = tf.keras.utils.to_categorical(y_test, nbr_categories)


### import VGG19 model (for Transfer learning)


In [None]:
model = applications.VGG19(
    weights="imagenet", include_top=False, input_shape=(img_wid, img_high, 3)
)


### first we iterate through the model and make 20 layer non-trainable

### then add pooling layer and add some dense layers with relu activation

### we used categorical_crossentropy loss function and Adam as the optimizer


In [None]:
# for layer in model.layers[:20]:
for layer in model.layers:
    layer.trainable = False

x = model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(512, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(256, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(128, activation="relu")(x)
predictions = tf.keras.layers.Dense(8, activation="softmax")(x)
model_final = tf.keras.models.Model(model.input, predictions)
model_final.compile(
    loss="categorical_crossentropy", optimizer="Adam", metrics=["accuracy"]
)


### then we fit the dataset to train


In [None]:
history = model_final.fit(
    X_train,
    y_train,
    # batch_size=32,
    batch_size=256,
    # epochs=8,
    epochs=65,
    verbose=1,
    validation_data=(X_test, y_test),
)


Epoch 1/65
Epoch 2/65
Epoch 3/65
Epoch 4/65
Epoch 5/65
Epoch 6/65
Epoch 7/65
Epoch 8/65
Epoch 9/65
Epoch 10/65
Epoch 11/65
Epoch 12/65
Epoch 13/65
Epoch 14/65
Epoch 15/65


In [None]:
score = model_final.evaluate(X_test, y_test, verbose=0)


In [None]:
print("Test loss:", round(score[0], 2))
print("Test accuracy:", round(score[1], 2))


In [None]:
print(history.history.keys())

plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])

plt.title("model accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.show()

plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper right")
plt.show()


In [None]:
X_train.shape


In [None]:
a = X_train[500]
a.shape


In [None]:
plt.imshow(X_train[500][:, :, ::-1])


In [None]:
a = X_train[500]
a.ndim


In [None]:
b = np.argmax(y_train[500])
b = categories[b]
b


In [None]:
a = X_train[500]
a = a[None, :, :, :]
print(a.shape)
predict_x = model_final.predict(a)
classes_x = np.argmax(predict_x, axis=1)
print(categories[classes_x[0]])


In [None]:
def predict_categorie_img(img, model, categories):
    try:
        img = img[None, :, :, :]
    except:
        raise TypeError("test image dimension != 3")
    predict = model.predict(img)
    classes = np.argmax(predict, axis=1)
    return categories[classes[0]]


In [None]:
def cvtRGB(img):
    return cv2.cvtColor(img.copy(), cv2.COLOR_BGR2RGB)


plt.figure(figsize=(15, 10))
for i, imgs in enumerate(images):
    plt.subplot(3, 5, i + 1)
    idx = np.random.randint(len(imgs))
    plt.imshow(cvtRGB(imgs[idx]))
    plt.grid("off")
    plt.title(categories[i] + " " + str(idx))
plt.show()
