In [101]:
from tensorflow.keras import models
from tensorflow.keras import layers
import tensorflow as tf
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from pathlib import Path
from tensorflow.keras.callbacks import EarlyStopping
from PIL import Image
import tifffile as tiff
from tensorflow.keras.utils import load_img, img_to_array

In [2]:
MODEL_TARGET = os.environ.get('MODEL_TARGET')
GCP_PROJECT = os.environ.get('GCP_PROJECT')
GCP_REGION = os.environ.get('GCP_REGION')

BUCKET_NAME = os.environ.get('BUCKET_NAME')
BQ_REGION = os.environ.get('BQ_REGION')
BQ_DATASET = os.environ.get('BQ_DATASET')
PLATE_NUMBER = os.environ.get('PLATE_NUMBER')

LOCAL_DATA_PATH = os.path.join(os.path.expanduser('~'), ".morpho_minds_data")

In [6]:
data_df = pd.read_csv(os.path.join(LOCAL_DATA_PATH, PLATE_NUMBER, 'processed', f'{PLATE_NUMBER}_small.csv'))
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2304 entries, 0 to 2303
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PhGolgi      2304 non-null   object 
 1   Hoechst      2304 non-null   object 
 2   ERSyto       2304 non-null   object 
 3   Mito         2304 non-null   object 
 4   ERSytoBleed  2304 non-null   object 
 5   CellCount    2304 non-null   int64  
 6   Well         2304 non-null   object 
 7   PhotoNumber  2304 non-null   int64  
 8   Role         2304 non-null   object 
 9   MMoles       2304 non-null   float64
 10  Plate        2304 non-null   int64  
 11  MeanArea     2304 non-null   float64
dtypes: float64(2), int64(3), object(7)
memory usage: 216.1+ KB


In [47]:
paths_df = data_df['PhGolgi'].map(lambda x: str(Path(LOCAL_DATA_PATH).joinpath(PLATE_NUMBER, 'raw', 'pictures', f'{PLATE_NUMBER}-Ph_golgi', x.split('/')[-1])))

In [147]:
channel_image_files = {'channel' : []}
channel_images = {'channel' : []}

for path in paths_df:

    channel_image_files['channel'].append(path)

    img = load_img(paths_df[0], target_size=(224, 224), color_mode='grayscale')
    img_array = img_to_array(img)
    img_array_expanded = np.expand_dims(img_array, axis=0)

    channel_images['channel'].append(img_array_expanded)

images_batch = np.vstack(channel_images['channel'])
images_batch/=65535

In [148]:
X_train, X_test, y_train, y_test = train_test_split(images_batch, data_df['CellCount'], test_size=0.2, random_state=42)

In [150]:
base_model_score = np.sum((data_df['CellCount'] - data_df['CellCount'].mean())**2)/data_df.shape[0]
base_model_score

771.4876030815972

In [151]:
def initialize_model():

    model = models.Sequential()
    model.add(layers.Conv2D(16, kernel_size=(4,4), activation = 'relu', input_shape = (224,224,1)))
    model.add(layers.MaxPool2D(pool_size=(2,2)))

    model.add(layers.Conv2D(64, kernel_size=(3,3), activation = 'relu'))
    model.add(layers.MaxPool2D(pool_size=(2,2)))

    model.add(layers.Conv2D(64, kernel_size=(2,2), activation = 'relu'))
    model.add(layers.MaxPool2D(pool_size=(2,2)))

    model.add(layers.Conv2D(32, kernel_size=(2,2), activation = 'relu'))
    model.add(layers.MaxPool2D(pool_size=(2,2)))

    model.add(layers.Flatten())

    model.add(layers.Dense(10, activation='relu'))

    model.add(layers.Dense(1, activation='linear'))

    model.compile(loss='mse',
                  optimizer='adam')

    return model

In [152]:
model = initialize_model()
es = EarlyStopping(patience = 15, restore_best_weights = True)

history = model.fit(X_train,
                    y_train,
                    validation_split = 0.3,
                    batch_size = 32,
                    epochs = 100,
                    callbacks = [es],
                    verbose = 1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100


In [153]:
model.evaluate(X_test, y_test)



759.3409423828125

In [30]:
batch_size = 32
img_height = 180
img_width = 180
data_dir = os.path.join(LOCAL_DATA_PATH, PLATE_NUMBER, 'raw', 'pictures', f'{PLATE_NUMBER}-Ph_golgi')

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)


Found 0 files belonging to 0 classes.
Using 0 files for training.


ValueError: No images found in directory /Users/pepe/.morpho_minds_data/24585/raw/pictures/24585-Ph_golgi/. Allowed formats: ('.bmp', '.gif', '.jpeg', '.jpg', '.png')