In [None]:
import pandas as pd
from pathlib import Path

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

train_df = pd.read_csv("../input/train.csv")
print("Shape of train data: {0}".format(train_df.shape))
test_df = pd.read_csv("../input/test.csv")
print("Shape of test data: {0}".format(test_df.shape))

diagnosis_df = pd.DataFrame({
    'diagnosis': [0, 1, 2, 3, 4],
    'diagnosis_label': ['No DR', 'Mild', 'Moderate', 'Severe', 'Proliferative DR']
})

train_df = train_df.merge(diagnosis_df, how="left", on="diagnosis")

train_image_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser("../input/train_images")) for f in fn]
train_images_df = pd.DataFrame({
    'files': train_image_files,
    'id_code': [file.split('/')[3].split('.')[0] for file in train_image_files],
})
train_df = train_df.merge(train_images_df, how="left", on="id_code")
del train_images_df
print("Shape of train data: {0}".format(train_df.shape))

test_image_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser("../input/test_images")) for f in fn]
test_images_df = pd.DataFrame({
    'files': test_image_files,
    'id_code': [file.split('/')[3].split('.')[0] for file in test_image_files],
})


test_df = test_df.merge(test_images_df, how="left", on="id_code")
del test_images_df
print("Shape of test data: {0}".format(test_df.shape))

# Any results you write to the current directory are saved as output.


In [None]:
train_df.head()


In [None]:
test_df.head()


In [None]:
IMG_SIZE = 150
N_CLASSES = train_df.diagnosis.nunique()
CLASSES = list(map(str, range(N_CLASSES)))
BATCH_SIZE = 32
EPOCH_STEPS = 10
EPOCHS = 1


In [None]:
import tensorflow as tf
print(tf.__version__)

from keras.preprocessing.image import ImageDataGenerator

train_df["diagnosis"] = train_df["diagnosis"].astype(str)

train_data_gen = ImageDataGenerator(rescale=1.0/255, validation_split=0.3)
train_data = train_data_gen.flow_from_dataframe(
    dataframe=train_df, 
    x_col="files",
    y_col="diagnosis",
    batch_size=BATCH_SIZE,
    shuffle=True,
    classes=CLASSES,
    class_mode="sparse",
    target_size=(IMG_SIZE, IMG_SIZE),
    subset="training"
)

validation_data = train_data_gen.flow_from_dataframe(
    dataframe=train_df, 
    x_col="files",
    y_col="diagnosis",
    batch_size=BATCH_SIZE,
    shuffle=True,
    classes=CLASSES,
    class_mode="sparse",
    target_size=(IMG_SIZE, IMG_SIZE),
    subset="validation"
)

test_data_gen = ImageDataGenerator(rescale=1./255)
test_data = test_data_gen.flow_from_dataframe(
    dataframe=test_df,
    x_col="files",
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size = 1,
    shuffle=False,
    class_mode=None
)


In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    # tf.keras.layers.Dense(2048, activation='relu'),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    # tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(N_CLASSES, activation='softmax')
])
opt = tf.keras.optimizers.Adam(lr=0.001, epsilon=1e-6)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
model.fit(
    train_data,
    steps_per_epochs=EPOCH_STEPS,
    epochs=EPOCHS,
    validation_data=validation_data,
    validation_steps=10
)


In [None]:
filenames = test_data.filenames
classifications = model.predict_generator(test_data, steps=len(filenames))


In [None]:
results = pd.DataFrame({
    "id_code": filenames,
    "diagnosis": np.argmax(classifications, axis=1)
})
results["id_code"] = results["id_code"].map(lambda x: str(x)[:-4].split("/")[3])
results.head()


In [None]:
results.to_csv("submission.csv", index=False)
