In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import imageio
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import optimizers

In [3]:
df = pd.read_csv('/content/drive/My Drive/LeWagon_ODR/full_df_cleaned_v3.csv')

### **New Dataframe avec uniquement les observations Normal & Cataracte**

In [4]:
df_n = df[df['tarstr']=='N']

In [5]:
df_c = df[df['tarstr']=='C']

In [6]:
df_N_C = pd.concat([df_n, df_c], ignore_index=True)

**New Columns with the image PATH**

In [7]:
IMAGE_PATH = '/content/drive/My Drive/LeWagon_ODR/preprocessed_images2/'

In [8]:
df_N_C['filepath'] = IMAGE_PATH + df_N_C['filename']

**Add every image to the img_data array**

In [9]:
img_data = []
number_id_nofile = []

for i in range(len(df_N_C)):
  try:
    img_data.append(imageio.imread(df_N_C['filepath'][i]))
  except:
    number_id_nofile.append(df_N_C.index[i])

In [10]:
img_data_array = np.array(img_data)

**Create our X and y**

In [11]:
X = img_data_array

In [12]:
y = df_N_C['C']

**Train Test Split (en stratifiant sur y)**

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, stratify=y)

**Normalize our images**

In [14]:
X_train = X_train / 255
X_test = X_test / 255

**Baseline model**

In [20]:
def initialize_model():
  model = models.Sequential()

  model.add(layers.Conv2D(32, (3,3), input_shape=(256, 256, 3), activation='relu', padding='same'))
  model.add(layers.MaxPool2D(pool_size=(2,2)))
  model.add(layers.Conv2D(64, (3,3), activation='relu', padding='same'))
  model.add(layers.MaxPool2D(pool_size=(2,2)))
  model.add(layers.Conv2D(128, (3,3), activation='relu', padding='same'))
  model.add(layers.MaxPool2D(pool_size=(3,3)))
  model.add(layers.Conv2D(256, (3,3), activation='relu', padding='same'))
  model.add(layers.MaxPool2D(pool_size=(3,3)))
  ### Flattening
  model.add(layers.Flatten())
  ### One fully connected
  model.add(layers.Dense(120, activation='relu'))
  model.add(layers.Dropout(rate=0.5))
  model.add(layers.Dense(60, activation='relu'))
  model.add(layers.Dropout(rate=0.5))
  model.add(layers.Dense(1, activation='sigmoid'))

  model.compile(loss='binary_crossentropy', 
        optimizer='adam',
        metrics=['accuracy'])

  return model

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 64, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 21, 21, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 21, 21, 256)       2

In [21]:
model = initialize_model()
es = EarlyStopping(patience=20, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    validation_split=0.3,
                    epochs=100,
                    batch_size=16, 
                    verbose=1,
                    callbacks=[es])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100


**Evaluate on X_test & y_test**

In [22]:
model.evaluate(X_test, y_test, verbose=0)

[0.24723882973194122, 0.9529540538787842]

**Save the Baseline model**

In [23]:
import joblib

In [24]:
filename = '/content/drive/My Drive/LeWagon_ODR/baseline_model_N_C.joblib'
joblib.dump(model, filename)

TypeError: ignored