In [11]:
import numpy as np
from matplotlib.image import imread


class ImageDatasetBuilder:
    """
    Given a DataFrame whose index is a set of image IDs (as with {train, test}.csv), returns featurized images.
    """
    def __init__(self, x_dim=100, y_dim=100, source='../data/train/images/', mask=False):
        """
        Builds the featurized image transform.
        
        x_dim: int
            The X dimension to crop the images to.
        y_dim: int
            The Y dimension to crop the images to.
        source: str
            Path to the folder containing the image files.
        mask: booleon
            If true, the underlying data is a mask. If false, the underlying data is RGB. If the data is RGB,
            we take just the R component and skip the GB, because the images are grayscale anyway.
        """
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.source = source
        self.mask = mask
        
    def fit(self, X, y):
        return self
    
    def transform(self, X):
        r = np.asarray(
            list(
                map(lambda img_id: np.ravel(
                    imread(f'{self.source}/{img_id}.png')[:self.x_dim,:self.y_dim]
                ), X.index.values)
            )
        )
        return r if self.mask else r[:,::3]


from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier


def buildClassifier(dim=50, hidden_nodes=100):
    """Returns the Keras model."""
    def ret():
        clf = Sequential()
        clf.add(Dense(hidden_nodes, activation='relu', input_dim=dim**2))
        clf.add(Dense(dim**2, activation='softmax'))
        clf.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.1))
        return clf
    return ret

In [5]:
import pandas as pd
train = pd.read_csv("../data/train.csv", index_col="id", usecols=[0])
_depths = pd.read_csv("../data/depths.csv", index_col="id")
train = train.join(_depths)
test = _depths[~_depths.index.isin(train.index)]
del _depths

X = ImageDatasetBuilder(source='../data/train/images/').transform(train)
y = ImageDatasetBuilder(source='../data/train/masks/', mask=True).transform(train)

In [14]:
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression

clf = make_pipeline(
    KerasClassifier(buildClassifier(dim=100, hidden_nodes=100), epochs=10, batch_size=20)
)

In [15]:
clf.fit(X, y)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Pipeline(memory=None,
     steps=[('kerasclassifier', <keras.wrappers.scikit_learn.KerasClassifier object at 0x7feab2ccca90>)])