# Cellpose program. 

This is a program used to generate a new cellpose model based on a series of representative images and their corresponding labelled images. 

In [1]:
from cellpose import core, models, io, metrics
import os

import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import tqdm 
import tifffile as tf

import tqdm
import tkinter as tk
from tkinter import filedialog


from PIL import Image

import datetime

-------
### Write in cell type

In [8]:
cell_type = 'KP1080'

### set the Folder path for the test images

In [2]:
root = tk.Tk()
root.withdraw() # Stops a second window opening
image_folder = filedialog.askdirectory(title = 'Select image Folder')

### Set the folder path for the user defined masks

In [3]:
root = tk.Tk()
root.withdraw() # Stops a second window opening
mask_folder = filedialog.askdirectory(title = 'Select Masks Folder')

--------------
## Create a method to extract all the filenames from a folder. 

In [4]:
def get_files_from_folder(folder_path): 
    '''A method to extract all files from the image.'''

    file_list = os.listdir(folder_path)
    image_files = []
    
    for i in range( len(file_list) ): 
        if file_list[i][-4:] == '.tif' or file_list[i][-4:] == '.png':
            image_files.append(file_list[i])
        
    return(image_files)

----
## Create a method to download in the image data from the image file name. 

In [5]:
def get_image_data(image_file):
    '''Get the image data from the file using Pillow.
    Convert the PILLOW image to a numpy array'''

    image_data = tf.imread(image_file)
    
    # print(image_data.getexif())
    
    np_image_data = np.array(image_data)

    return(np_image_data)

-----
## Collect the image files from folder. 

In [6]:
image_file_list = get_files_from_folder(image_folder)
mask_file_list = get_files_from_folder(mask_folder)


print(image_folder)
print(mask_folder)
print( len(image_file_list) )
print( len(mask_file_list) )


//data.beatson.gla.ac.uk/data/RCORBYN/User_Data/Current Projects/20240108_Ximena_cell_tracking/incucyte/Testing/Test 11/Dataset/XR175/QuPath/ground_truth/_images
//data.beatson.gla.ac.uk/data/RCORBYN/User_Data/Current Projects/20240108_Ximena_cell_tracking/incucyte/Testing/Test 11/Dataset/XR175/QuPath/ground_truth/_masks
20
20


----
## extract datasets for testing/training

In [7]:
training_image_index = np.random.choice(len(image_file_list)-1, size = int(0.8*(len(image_file_list)-1)), replace = False)

print('Training image Files :' + str(np.sort(training_image_index)) )
print('Number of training Images: ' + str(len(training_image_index)))

# Create test image index. 
test_image_index = []

for i in range(len(image_file_list)):
    if len(np.where(training_image_index == i)[0]) == 0:
        test_image_index.append(i)

print('Validation image files :' + str(test_image_index) )
print('Number of Validation Images: ' + str(len(test_image_index)) )

Training image Files :[ 1  2  4  5  6  9 10 11 12 13 14 15 16 17 18]
Number of training Images: 15
Validation image files :[0, 3, 7, 8, 19]
Number of Validation Images: 5


--------
## Get test images and user_masks into a format for cellpose model Training. 

In [8]:
# Initialise 
ground_truth_training = []
training_images = []

for i in range(len(training_image_index)):
    # get the image data
    image_file_name = image_file_list[training_image_index[i]]
    individual_image = get_image_data(image_folder + '/'+ image_file_name)
    training_images.append(individual_image)
    # get the corresponding user_defined_mask
    mask_file_name = image_file_name[0:-4] + '.tif'
    # print(image_file_name)
    # print(mask_file_name)
    user_mask = get_image_data(mask_folder + '/'+ mask_file_name)
    ground_truth_training.append(user_mask)

    
# ground_truth = np.array(ground_truth)
print(training_images[0].shape)
print(ground_truth_training[0].shape)
print(len(training_images))

(300, 300)
(300, 300)
15


-----
## Get test images and user_masks into a format for cellpose model evaluation. 

In [9]:
ground_truth_test = []
test_images = []

for i in range(len(test_image_index)):
    # get the image data
    image_file_name = image_file_list[test_image_index[i]]
    individual_image = get_image_data(image_folder + '/'+ image_file_name)
    test_images.append(individual_image)
    # get the corresponding user_defined_mask
    mask_file_name = image_file_name[0:-4] + '.tif'
    # print(image_file_name)
    # print(mask_file_name)
    user_mask = get_image_data(mask_folder + '/'+ mask_file_name)
    ground_truth_test.append(user_mask)

    
# ground_truth = np.array(ground_truth)
print(test_images[i].shape)
print(ground_truth_test[i].shape)
print(len(test_images))

(300, 300)
(300, 300)
5


---------
### Create a meta_data file

In [7]:
def save_metadata():
    '''Save the training parameters for the deep learning model.  '''

    meta_data = pd.DataFrame({'model_name': [model_name], 
                              'Number of Training Images': [num_training_ims], 
                              'Number of Test Images': [num_testing_ims], 
                              'Number of Epochs': [n_epochs],  
                              'Learning Rate for training': [learning_rate], 
                              'Weight Decay for training': [weight_decay], 
                              'Training data split': [training_data_split], 
                              'Validation data split': [1 - training_data_split], 
                              'Number of images per epoch': [ims_per_epoch], 
                              'Model from the zoo': [start_model], 
                              'Model Accuracy': [ap]})
    
    meta_data.to_csv(os.path.dirname(
                    os.path.dirname(image_folder)) + '/models/' + model_name + '_meatadata.csv')

2024-04-30 09:31:40.028148
2024_04_30
09_31_40


--- 

---
## I will attempt to train a model on the training datasets. 

In [17]:
# This cell is taken directly from the Colab notebook and then 
# modified for my needs. 

# Get the time and date for the mask name. 
date_time_vals = str(datetime.datetime.now())

date = date_time_vals[0:10].replace('-', '_')

point_find = date_time_vals.find('.')
time = date_time_vals[11:point_find].replace(':', '_')

start_model = 'cyto'

# start logger (to see training across epochs)
logger = io.logger_setup()

# DEFINE CELLPOSE MODEL (without size model)
model = models.CellposeModel(gpu=True, model_type=start_model)

# set channels
channels = [0, 0]

n_epochs = 100

learning_rate = 0.1

weight_decay = 0.0001

ims_per_epoch = 6

training_data_split = 0.8

num_training_ims = int(training_data_split * len(training_images)) 

num_testing_ims = len(training_images) - int(training_data_split * len(training_images)) 

model_name =  date + '_' + time + '_' + cell_type 


test_files2 = list(test_images)
test_masks2 = list(ground_truth_test)

# # get files
# output = io.load_train_test_data(train_dir, test_dir, mask_filter='_seg.npy')
# # train_data, train_labels, _, test_data, test_labels, _ = output

new_model_path = model.train(training_images[0 : num_training_ims ], ground_truth_training[0 : num_training_ims ], 
                              test_data=training_images[num_training_ims:],
                              test_labels=ground_truth_training[num_training_ims:],
                              channels=channels, 
                              save_path=os.path.dirname(image_folder), 
                              n_epochs=n_epochs,
                              learning_rate=learning_rate, 
                              weight_decay=weight_decay, 
                              nimg_per_epoch=ims_per_epoch,
                              model_name = model_name )

# diameter of labels in training images
diam_labels = model.diam_labels.copy()

print(test_images[0].shape)

creating new log file
2024-02-16 13:06:41,216 [INFO] WRITING LOG OUTPUT TO C:\Users\rcorbyn\.cellpose\run.log
2024-02-16 13:06:41,217 [INFO] 
cellpose version: 	2.2.2 
platform:       	win32 
python version: 	3.8.13 
torch version:  	1.13.1
2024-02-16 13:06:41,220 [INFO] >> cyto << model set to be used
2024-02-16 13:06:41,222 [INFO] ** TORCH CUDA version installed and working. **
2024-02-16 13:06:41,223 [INFO] >>>> using GPU
2024-02-16 13:06:41,399 [INFO] >>>> model diam_mean =  30.000 (ROIs rescaled to this size during training)
2024-02-16 13:06:41,438 [INFO] computing flows for labels


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 76.42it/s]

2024-02-16 13:06:41,626 [INFO] computing flows for labels



100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 75.02it/s]

2024-02-16 13:06:41,685 [INFO] >>>> median diameter set to = 30
2024-02-16 13:06:41,686 [INFO] >>>> mean of training label mask diameters (saved to model) 6.548
2024-02-16 13:06:41,687 [INFO] >>>> training network with 2 channel input <<<<
2024-02-16 13:06:41,688 [INFO] >>>> LR: 0.10000, batch_size: 8, weight_decay: 0.00010
2024-02-16 13:06:41,689 [INFO] >>>> ntrain = 7, ntest = 3
2024-02-16 13:06:41,692 [INFO] >>>> nimg_per_epoch = 7





2024-02-16 13:06:42,150 [INFO] Epoch 0, Time  0.5s, Loss 1.2746, Loss Test 0.2689, LR 0.0000
2024-02-16 13:06:42,442 [INFO] saving network parameters to //data.beatson.gla.ac.uk/data/RCORBYN/User_Data/Current Projects/20240108_Ximena_cell_tracking/incucyte/Testing/Test 11/Dataset/XR175/QuPath/ground_truth\models/20240216_dead_cell_segment_fluor_2
2024-02-16 13:06:43,785 [INFO] Epoch 5, Time  2.1s, Loss 0.7654, Loss Test 0.1793, LR 0.0556
2024-02-16 13:06:45,261 [INFO] Epoch 10, Time  3.6s, Loss 0.0810, Loss Test 10.0485, LR 0.1000
2024-02-16 13:06:48,078 [INFO] Epoch 20, Time  6.4s, Loss 0.0730, Loss Test 1.1373, LR 0.1000
2024-02-16 13:06:50,924 [INFO] Epoch 30, Time  9.2s, Loss 0.0341, Loss Test 0.1690, LR 0.1000
2024-02-16 13:06:53,776 [INFO] Epoch 40, Time 12.1s, Loss 0.0238, Loss Test 0.1589, LR 0.1000
2024-02-16 13:06:56,587 [INFO] Epoch 50, Time 14.9s, Loss 0.0219, Loss Test 0.1066, LR 0.1000
2024-02-16 13:06:59,405 [INFO] Epoch 60, Time 17.7s, Loss 0.0338, Loss Test 0.1064, LR 

In [18]:
retrained_masks = []

model_path = '20240216_dead_cell_segment_fluor_2'

# declare model
model = models.CellposeModel(gpu=True, model_type=model_path)

for i in tqdm.tqdm(range(len(test_images) )):
    masks = model.eval(test_images[i], channels = [0, 0], diameter = None)[0]
    retrained_masks.append(masks)

# Check the performance of the model using IoU metric. 
ap = metrics.average_precision(ground_truth_test, retrained_masks)[0]
print(ap[:,0].mean()) 

2024-02-16 13:07:31,922 [INFO] >> 20240216_dead_cell_segment_fluor_2 << model set to be used
2024-02-16 13:07:31,925 [INFO] ** TORCH CUDA version installed and working. **
2024-02-16 13:07:31,927 [INFO] >>>> using GPU
2024-02-16 13:07:32,102 [INFO] >>>> model diam_mean =  30.000 (ROIs rescaled to this size during training)
2024-02-16 13:07:32,103 [INFO] >>>> model diam_labels =  6.548 (mean diameter of training ROIs)


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:04<00:00,  1.10it/s]

0.0





In [14]:
for i in range( len(test_image_index) ):
    print(image_file_list[test_image_index[i]])

Training_image_XR175_G11_00.tif
Training_image_XR175_G11_03.tif
Training_image_XR175_G11_07.tif
Training_image_XR175_G11_08.tif
Training_image_XR175_H12_19.tif


In [19]:
im = 0
fig1, ax1 = plt.subplots()
ax1.imshow(test_images[im][ :, :], vmin = 50)
ax1.imshow(retrained_masks[im], alpha = 0.25, cmap = 'inferno_r', vmax = 1)
ax1.set_title('Cellpose Masks')

fig3, ax3 = plt.subplots()
ax3.imshow(test_images[im][ :, :], vmin = 50)
ax3.imshow(ground_truth_test[im], alpha = 0.25, cmap = 'inferno_r', vmax = 1)
ax3.set_title('Ground truth')

Text(0.5, 1.0, 'Ground truth')

Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x000002816CA90E50> (for post_execute):


ValueError: minvalue must be less than or equal to maxvalue

ValueError: minvalue must be less than or equal to maxvalue

<Figure size 640x480 with 1 Axes>

ValueError: minvalue must be less than or equal to maxvalue

<Figure size 640x480 with 1 Axes>

In [None]:
print(test_images[im].shape)
print(len(retrained_masks))
# %matplotlib
# plt.figure(figsize=(40,16))
fig, ax = plt.subplots()
ax.imshow(test_images[im][ :, :])
fig2, ax2 = plt.subplots()
ax2.imshow(retrained_masks[im], alpha = 0.25, cmap = 'inferno_r', vmax = 1)
# print(np.max(retrained_masks[2]))
