In [1]:
import numpy as np
import pandas as pd

# Example for solving Course Challenge using GitHub Copilot.

In the following cells, I provide an example of several propmts in the form of comments that can aid in solving the UQ-Bio Summer School Image Processing Challenge.  This notebook will only sketch out a solution -- you will need to make adjustments to achieve a full solution.

In [2]:
# Part One -- Processing Image Data

# In the first part of the challenge, we load a simulated video file and process the images.

# To accomplish this, we will write a routine that takes the file name of a video file as input 
# and returns a list of features extracted from the images.

# Each video has N_times=11 time points: 0, 100, 200, ..., 1000

# For each video, the processor finds N_cells. This number will change for each video depending
# on how many cells are detected.

# The image processor will return a list with one entry for each detected cell. 
# Within each list will be a dictionary of 1xN_times numpy arrays with the following keys:
# 	x_position  -- float  -- the x position of the cell
# 	y_position  -- float -- the y position of the cell
# 	size_cell  -- float -- the size of the cell in pixels
# 	size_nucleus  -- float -- the size of the nucleus in pixels
# 	protein_level  -- float -- the protein level in the cell in arbitrary units
# 	number_mRNA_nucleus  -- int -- the number of mRNA in the nucleus
# 	number_mRNA_cytoplasm  -- int -- the number of mRNA in the cytoplasm
# 	number_TS   -- int -- the number of transcription sites
# 	intensity_1st_brightest_TS  -- float -- the intensity of the brightest transcription site
# 	intensity_2nd_brightest_TS  -- float -- the intensity of the second brightest transcription site 
# 	is_cell_vital  -- bool -- whether the cell is alive or dead

# For example, a list of features for a two cell video might look like this:
exampleOutput = [
    {
        'x_position': 1.0*np.ones(11),
        'y_position': 2.0*np.ones(11),
        'size_cell': 3.0*np.ones(11),
        'size_nucleus': 4.0*np.ones(11),
        'protein_level': 5.0*np.ones(11),
        'number_mRNA_nucleus': 6*np.ones(11),
        'number_mRNA_cytoplasm': 7*np.ones(11),
        'number_TS': 8*np.ones(11),
        'intensity_1st_brightest_TS': 9.0*np.ones(11),
        'intensity_2nd_brightest_TS': 10.0*np.ones(11),
        'is_cell_vital': [True, False, True, False, True, False, True, False, True, False, True]
    },
    {
        'x_position': 11.0*np.ones(11),
        'y_position': 12.0*np.ones(11),
        'size_cell': 13.0*np.ones(11),
        'size_nucleus': 14.0*np.ones(11),
        'protein_level': 15.0*np.ones(11),
        'number_mRNA_nucleus': 16*np.ones(11),
        'number_mRNA_cytoplasm': 17*np.ones(11),
        'number_TS': 18*np.ones(11),
        'intensity_1st_brightest_TS': 19.0*np.ones(11),
        'intensity_2nd_brightest_TS': 20.0*np.ones(11),
        'is_cell_vital': [False, True, False, True, False, True, False, True, False, True, False]
    }
]

print(exampleOutput)

[{'x_position': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]), 'y_position': array([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]), 'size_cell': array([3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.]), 'size_nucleus': array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.]), 'protein_level': array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.]), 'number_mRNA_nucleus': array([6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.]), 'number_mRNA_cytoplasm': array([7., 7., 7., 7., 7., 7., 7., 7., 7., 7., 7.]), 'number_TS': array([8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.]), 'intensity_1st_brightest_TS': array([9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.]), 'intensity_2nd_brightest_TS': array([10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10.]), 'is_cell_vital': [True, False, True, False, True, False, True, False, True, False, True]}, {'x_position': array([11., 11., 11., 11., 11., 11., 11., 11., 11., 11., 11.]), 'y_position': array([12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.]), 'size_cell': a

In [9]:
# Next, the data from the image processor will be stored in a Pandas DataFrame. 

# The names of the columns in the DataFrame will include "rep_num", "fov_num", "cell_num", "time"
# and all of the previously defined feature names.

# The "rep_num" column will be an integer that indicates the replicate number of the video.
# The "fov_num" column will be an integer that indicates the field of view number of the video.
# The "cell_num" column will be an integer that indicates the cell number of the video.
# The "time" column will be an float that indicates the time of the video.

# To create this dataframe, we will need to write a routine that takes sorts through the list
# and for each combination of cell and time, records the feature value in the appropriate 
# columns.

# First, create a list of the feature names

# Next, create a data frame with the columns "rep_num", "fov_num", "cell_num", "time" and all of the feature names

# Loop through each cell in the list
# Loop through the times
# Loop through the feature names in the dictionary
# Record the feature value in the appropriate column

featureNames = list(exampleOutput[0].keys())
# Note that this might change the order -- so let's not rely on orders.

# Create a data frame with the columns "rep_num", "fov_num", "cell_num", "time" and all of the feature names.
df = pd.DataFrame(columns=["rep_num", "fov_num", "cell_num", "time"] + featureNames)

# Loop through each cell in the list
for cell_num, cell in enumerate(exampleOutput):
    # Loop through the times
    for time, time_val in enumerate(range(0, 1100, 100)):
        # Loop through the feature names in the dictionary
        df.loc[len(df)] = [0, 0, cell_num, time_val] + [cell[featureName][time] for featureName in featureNames]

print(df)

    rep_num  fov_num  cell_num  time  x_position  y_position  size_cell  \
0         0        0         0     0         1.0         2.0        3.0   
1         0        0         0   100         1.0         2.0        3.0   
2         0        0         0   200         1.0         2.0        3.0   
3         0        0         0   300         1.0         2.0        3.0   
4         0        0         0   400         1.0         2.0        3.0   
5         0        0         0   500         1.0         2.0        3.0   
6         0        0         0   600         1.0         2.0        3.0   
7         0        0         0   700         1.0         2.0        3.0   
8         0        0         0   800         1.0         2.0        3.0   
9         0        0         0   900         1.0         2.0        3.0   
10        0        0         0  1000         1.0         2.0        3.0   
11        0        0         1     0        11.0        12.0       13.0   
12        0        0     