In [1]:
import os 
import pandas as pd
pd.options.display.max_colwidth = 100
import altair as alt
alt.renderers.enable('html')
alt.data_transformers.disable_max_rows() # altair now handles larger datasets

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [2]:
# Data import (Already filtered data!)

df_Path = "/Volumes/TOB_WD2/Image_Analysis/EB1/Dataframes" + "/MasterDataFrame_EB1.csv"
df = pd.read_csv(df_Path)

print("The shape of the df after import: " + str(df.shape))

The shape of the df after import: (376, 35)


In [3]:
# Filter datasets
Keep = ["20220916", "20220921", "20220922", "20221021", "20221027", "20221103"] 
df = df[df['Experiment'].isin(Keep)]

df = df[(df.Cell_Volume_um3 < 4500) | (df.Cell_Volume_um3.isna())]
df = df[(df.Average_Track_Velocity > 0.15) | (df.Average_Track_Velocity.isna())]

print("The shape of the df after dataset filtering: " + str(df.shape))

The shape of the df after dataset filtering: (70, 35)


In [71]:
df.groupby(["Condition"]).Cell_ID.nunique()

Condition
FBS+LIF     19
N2B27+RA    51
Name: Cell_ID, dtype: int64

In [72]:
df.groupby(["Condition"]).Average_Track_Velocity.mean()

Condition
FBS+LIF     0.264460
N2B27+RA    0.253579
Name: Average_Track_Velocity, dtype: float64

In [9]:
df.groupby(["Condition"]).Average_Track_Velocity.std()

Condition
FBS+LIF     0.020574
N2B27+RA    0.039272
Name: Average_Track_Velocity, dtype: float64

In [10]:
df.groupby(["Condition"]).Number_Comets_Total.mean()

Condition
FBS+LIF     61.083333
N2B27+RA    53.882353
Name: Number_Comets_Total, dtype: float64

In [11]:
df.groupby(["Condition"]).Number_Comets_Total.std()

Condition
FBS+LIF     14.874423
N2B27+RA    10.975156
Name: Number_Comets_Total, dtype: float64

In [12]:
df.groupby(["Condition"]).Number_Comets_SpindleBulk.mean()

Condition
FBS+LIF     51.333333
N2B27+RA    40.823529
Name: Number_Comets_SpindleBulk, dtype: float64

In [13]:
df.groupby(["Condition"]).Number_Comets_SpindleBulk.std()

Condition
FBS+LIF     12.752148
N2B27+RA     8.998162
Name: Number_Comets_SpindleBulk, dtype: float64

In [14]:
df.groupby(["Condition"]).Number_Comets_Astral.mean()

Condition
FBS+LIF      9.750000
N2B27+RA    13.058824
Name: Number_Comets_Astral, dtype: float64

In [15]:
df.groupby(["Condition"]).Number_Comets_Astral.std()

Condition
FBS+LIF     2.909382
N2B27+RA    3.431847
Name: Number_Comets_Astral, dtype: float64

In [16]:
colourscheme = "accent"

In [17]:
def stripbox(data, x, y, y_title, colour):
    boxplot = alt.Chart().mark_boxplot(
        extent = 'min-max', 
        size = 12
    ).encode(
        y = alt.Y(y, title = y_title),
        opacity = alt.value(1),
        stroke = alt.value('black'),
        color = alt.value('white')
    ).properties(
        width = 30,
        height = 200
    )

    stripplot = alt.Chart().mark_circle(
        size = 20, opacity = 1
    ).encode(
        x = alt.X(
            'jitter:Q',
            title = None,
            axis = alt.Axis(values = [0], grid = False, labels = False, ticks = True),
        ),
        y = alt.Y(y, title = y_title, 
            axis = alt.Axis(grid = False, labels = True, ticks = True)),
        color = alt.Color(colour, scale = alt.Scale(scheme = colourscheme), legend = None)
        ).transform_calculate(
            jitter = '(sqrt(-2 * log(random() / 2)) * cos(2 * PI * random() / 2))'
    ).properties(
        width = 15,
        height = 200
    )
    
    FACETCHART = alt.layer(
        stripplot, boxplot, data = data
        ).facet(
            column = alt.Column(x, header = alt.Header(
                labelAngle = -90,
                titleOrient = 'top',
                labelOrient = 'bottom',
                labelAlign = 'right',
                labelPadding = 5)
                )
        ).configure_facet(
            spacing = 18
        ).configure_axis(
                grid = True, ticks = True, labelPadding = 5
        ).configure_header(
            labelOrient = 'bottom', title = None
        ).configure_view(
            stroke = 'transparent', 
            strokeWidth = 0.5
        )
    return FACETCHART

In [18]:
def Scatter(dataframe, x, y, color, x_title, y_title,
            Circlesize = 20, 
            CircleOpacity = 0.75,  
            Scatter_width = 200, 
            Scatter_height = 200):
    
    SCATTER = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = CircleOpacity,
    ).encode(
        alt.X(x, title = x_title),
        alt.Y(y, title = y_title),
        color = alt.Color(color, scale = alt.Scale(scheme = colourscheme))
    )
    
    return SCATTER

In [20]:
def Scatterbin(dataframe, x, y, color, x_title, y_title, binextent, binstep, x_max,
            Circlesize = 10, 
            CircleOpacity = 0.4,  
            Scatter_width = 150, 
            Scatter_height = 150
               ):
    # Standard scatter plot 
    SCATTER = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = CircleOpacity,
        size = Circlesize
    ).encode(
        alt.X(x, title = x_title, scale = alt.Scale(domain = [0, x_max])),
        alt.Y(y, title = y_title),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        ) 
    )
    
    SCATTER_bin = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = 1,
        size = 100
    ).encode(
        alt.X(x, title = x_title, bin = alt.Bin(extent = binextent, step = binstep)),
        alt.Y("median(" + y + ")", title = y_title, bin = False),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        ) 
    )
    
    ERROR_Scatterbin = alt.Chart(
            data = dataframe
    ).mark_errorbar(extent = "iqr").encode(
        alt.X(x, title = x_title, bin = alt.Bin(extent = binextent, step = binstep)),
        alt.Y("median(" + y + ")", title = y_title, bin = False),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        ) 
    ) 

    
    SCATTERBIN = SCATTER + ERROR_Scatterbin + SCATTER_bin 
    return SCATTERBIN

In [21]:
df.columns

Index(['Unnamed: 0', 'Cell_ID', 'Experiment', 'Average_Track_Velocity',
       'Average_Track_Velocity_Stdev', 'Average_Track_Quality',
       'Average_Track_Duration', 'Number_of_Tracks',
       'Number_Comets_SpindleBulk', 'Number_Comets_Astral',
       'Number_Comets_Total', 'Astral_per_Total', 'Spindle_per_Total',
       'Cell_Volume_um3', 'Cell_SurfaceArea_um2', 'Cell_Sphericity',
       'Spindle_Angle_Degrees', 'Spindle_Aspect_Ratio', 'Spindle_Length_um',
       'Spindle_Volume_um3', 'Spindle_Width_Avg_um',
       'Tubulin_Spindle_Average_Intensity', 'Chromatin_Volume_um3',
       'MetaphasePlate_Length_um', 'MetaphasePlate_Width_um',
       'Spindle_Length_um_manual', 'Condition', 'Incubation_h',
       'Spindle_Occupancy', 'Spindle_Length_per_Cell_Volume',
       'CometsTotal_per_Cell_Volume', 'CometsAstral_per_Cell_Volume',
       'CometsBulk_per_Cell_Volume', 'CometsBulk_per_Spindle_Volume',
       'Astral:Bulk_Comets'],
      dtype='object')

In [22]:
STRIP_CV = stripbox(
    data = df, 
    x = "Condition", 
    y = "Cell_Volume_um3", 
    y_title = "Cell volume (µm3)", 
    colour = "Experiment"
)
STRIP_CV

In [23]:
STRIP_SV = stripbox(
    data = df, 
    x = "Condition", 
    y = "Spindle_Volume_um3", 
    y_title = "Spindle volume (µm3)", 
    colour = "Experiment"
)
STRIP_SV

In [24]:
STRIP_SLman = stripbox(
    data = df, 
    x = "Condition", 
    y = "Spindle_Length_um_manual", 
    y_title = "Spindle length manual (µm)", 
    colour = "Experiment"
)
STRIP_SLman

In [25]:
STRIP_SLperCV = stripbox(
    data = df, 
    x = "Condition", 
    y = "Spindle_Length_per_Cell_Volume", 
    y_title = "Spindle length per cell volume (µm-2)", 
    colour = "Experiment"
)
STRIP_SLperCV

In [26]:
df.groupby("Condition")["Spindle_Length_per_Cell_Volume"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
FBS+LIF,19.0,0.005166,0.000535,0.004316,0.004811,0.004967,0.00567,0.005945
N2B27+RA,51.0,0.004356,0.000538,0.003446,0.003986,0.004293,0.00461,0.006174


In [27]:
print(df.groupby("Condition").Spindle_Occupancy.mean())

STRIP_SSR = stripbox(
    data = df, 
    x = "Condition", 
    y = "Spindle_Occupancy", 
    y_title = "Spindle occupancy", 
    colour = "Experiment"
)
STRIP_SSR

Condition
FBS+LIF     12.854146
N2B27+RA    10.664268
Name: Spindle_Occupancy, dtype: float64


In [28]:
STRIP_Velocity = stripbox(
    data = df, 
    x = "Condition", 
    y = "Average_Track_Velocity", 
    y_title = "Growth velocity (µm/s)", 
    colour = "Condition"
)
STRIP_Velocity

In [29]:
df.groupby("Condition").Average_Track_Velocity.mean()

Condition
FBS+LIF     0.264460
N2B27+RA    0.253579
Name: Average_Track_Velocity, dtype: float64

In [30]:
df.groupby("Condition").Average_Track_Velocity.std()

Condition
FBS+LIF     0.020574
N2B27+RA    0.039272
Name: Average_Track_Velocity, dtype: float64

In [31]:
df_growth = df[(df["Average_Track_Velocity"] > 0)]
print(df_growth.Condition.value_counts())

N2B27+RA    25
FBS+LIF     10
Name: Condition, dtype: int64


In [32]:
STRIP_Comets_Total = stripbox(
    data = df, 
    x = "Condition", 
    y = "Number_Comets_Total", 
    y_title = "Nr of total comets", 
    colour = "Experiment"
)
STRIP_Comets_Total

In [33]:
STRIP_Comets_Bulk = stripbox(
    data = df, 
    x = "Condition", 
    y = "Number_Comets_SpindleBulk", 
    y_title = "Nr of spindle bulk comets", 
    colour = "Condition"
)
STRIP_Comets_Bulk

In [34]:
df_bulk = df[(df["Number_Comets_SpindleBulk"] > 0)]
print(df_bulk.Condition.value_counts())

N2B27+RA    17
FBS+LIF     10
Name: Condition, dtype: int64


In [35]:
STRIP_Comets_Astral = stripbox(
    data = df, 
    x = "Condition", 
    y = "Number_Comets_Astral", 
    y_title = "Nr of astral comets", 
    colour = "Experiment"
)
STRIP_Comets_Astral

In [36]:
df_astral = df#[df["Astral_per_Total"] < 0.4]

STRIP_Comets_AstralPerTotal = stripbox(
    data = df_astral, 
    x = "Condition", 
    y = "Astral_per_Total", 
    y_title = "Nr of Astral Comets per Total Comets", 
    colour = "Condition"
)
STRIP_Comets_AstralPerTotal

In [37]:
STRIP_Astral_Bulk_Comets = stripbox(
    data = df_astral, 
    x = "Condition", 
    y = "Astral:Bulk_Comets", 
    y_title = "Nr. of Astral Comets : Nr. of Bulk Comets", 
    colour = "Condition"
)
STRIP_Astral_Bulk_Comets

In [38]:
df_astral_query = df_astral[(df_astral["Astral:Bulk_Comets"] > 0)]
print(df_astral_query.Condition.value_counts())

N2B27+RA    17
FBS+LIF     10
Name: Condition, dtype: int64


In [39]:
STRIP_CometsTotalperCV = stripbox(
    data = df_astral, 
    x = "Condition", 
    y = "CometsTotal_per_Cell_Volume", 
    y_title = "Nr of Total comets per Cell Volume", 
    colour = "Condition"
)
STRIP_CometsTotalperCV

In [40]:
df_totcomets = df_astral[(df_astral["CometsTotal_per_Cell_Volume"] > 0)]
print(df_totcomets.Condition.value_counts())

N2B27+RA    17
FBS+LIF     10
Name: Condition, dtype: int64


In [41]:
STRIP_CometsAstralperCV = stripbox(
    data = df_astral, 
    x = "Condition", 
    y = "CometsAstral_per_Cell_Volume", 
    y_title = "Nr of astral comets per Cell Volume", 
    colour = "Condition"
)
STRIP_CometsAstralperCV

In [42]:
STRIP_CometsBulkperCV = stripbox(
    data = df_astral, 
    x = "Condition", 
    y = "CometsBulk_per_Cell_Volume", 
    y_title = "Nr of spindle bulk comets per Cell Volume", 
    colour = "Condition"
)
STRIP_CometsBulkperCV

In [43]:
STRIP_CometsBulkperSV = stripbox(
    data = df_astral, 
    x = "Condition", 
    y = "CometsBulk_per_Spindle_Volume", 
    y_title = "Nr of spindle bulk comets per Spindle Volume", 
    colour = "Condition"
)
STRIP_CometsBulkperSV

In [44]:
SCATTER_SL_SLman = Scatter(
    dataframe = df, 
    x = "Spindle_Length_um_manual", 
    y = "Spindle_Length_um", 
    color = "Condition", 
    x_title = "Spindle_Length_um_manual", 
    y_title = "Spindle_Length_um"
)
SCATTER_SL_SLman

In [45]:
SCATTER_CV_Velocity = Scatter(
    dataframe = df, 
    x = "Cell_Volume_um3", 
    y = "Average_Track_Velocity", 
    color = "Condition", 
    x_title = "Cell volume (um3)", 
    y_title = "Mean velocity per cell"
)

SCATTER_CV_Velocity_bin = Scatter_bin(
    dataframe = df, 
    x = "Cell_Volume_um3", 
    y = "mean(Average_Track_Velocity)", 
    color = "Condition", 
    x_title = "Cell volume (um3)", 
    y_title = "Mean velocity per cell",
    Circlesize = 50
)

SCATTER_CV_Velocity = SCATTER_CV_Velocity + SCATTER_CV_Velocity.transform_regression('Cell_Volume_um3', 'Average_Track_Velocity').mark_line()

SCATTER_CV_Velocity #| SCATTER_CV_Velocity_bin

In [46]:
df_CV_velo = df[(df["Cell_Volume_um3"] > 0) & (df["Average_Track_Velocity"] > 0)]
print(df_CV_velo.Condition.value_counts())

N2B27+RA    25
FBS+LIF     10
Name: Condition, dtype: int64


In [47]:
df_Spindle = df[df["Spindle_Volume_um3"] > 0]
print(df_Spindle.Condition.value_counts())

N2B27+RA    15
FBS+LIF      8
Name: Condition, dtype: int64


In [48]:
SCATTER_SV_Velocity = Scatter(
    dataframe = df, 
    x = "Spindle_Volume_um3", 
    y = "Average_Track_Velocity", 
    color = "Condition", 
    x_title = "Spindle volume (um3)", 
    y_title = "Mean velocity per cell"
)

SCATTER_SV_Velocity_bin = Scatter_bin(
    dataframe = df, 
    x = "Spindle_Volume_um3", 
    y = "mean(Average_Track_Velocity)", 
    color = "Condition", 
    x_title = "Spindle volume (um3)", 
    y_title = "Mean velocity per cell",
    Circlesize = 50
)


SCATTER_SLman_Velocity = Scatter(
    dataframe = df, 
    x = "Spindle_Length_um_manual", 
    y = "Average_Track_Velocity", 
    color = "Condition", 
    x_title = "Spindle length (µm) manual", 
    y_title = "Mean velocity per cell"
)

#SCATTER_SV_Velocity = SCATTER_SV_Velocity + SCATTER_SV_Velocity.transform_regression('Spindle_Volume_um3', 'Average_Track_Velocity', groupby = ['Condition']).mark_line()
SCATTER_SLman_Velocity = SCATTER_SLman_Velocity + SCATTER_SLman_Velocity.transform_regression('Spindle_Length_um_manual', ' Average_Track_Velocity', groupby = ['Condition']).mark_line()

#SCATTER_SV_Velocity|SCATTER_SLman_Velocity #| SCATTER_SV_Velocity_bin
SCATTER_SLman_Velocity

In [49]:
SCATTERBIN_SLman_Velocity = Scatterbin(
    dataframe = df, 
    x = "Spindle_Length_um_manual", 
    y = "Average_Track_Velocity", 
    color = "Condition", 
    x_title = "Spindle length (µm) manual", 
    y_title = "Mean velocity per cell",
    binextent = [0, 15], 
    binstep = 2,
    x_max = 16
)
SCATTERBIN_SLman_Velocity

In [50]:
df_SL_velo = df[(df["Spindle_Length_um_manual"] > 0) & (df["Average_Track_Velocity"] > 0)]
print(df_SL_velo.Condition.value_counts())

N2B27+RA    25
FBS+LIF     10
Name: Condition, dtype: int64


In [51]:
SCATTERBIN_CV_Velocity = Scatterbin(
    dataframe = df, 
    x = "Cell_Volume_um3", 
    y = "Average_Track_Velocity", 
    color = "Condition", 
    x_title = "Cell volume (µm3)", 
    y_title = "Mean velocity per cell",
    binextent = [0, 4000], 
    binstep = 500,
    x_max = 4500
)
SCATTERBIN_CV_Velocity

In [52]:
SCATTERBIN_SLman_NrBulk = Scatterbin(
    dataframe = df, 
    x = "Spindle_Length_um_manual", 
    y = "Number_Comets_SpindleBulk", 
    color = "Condition", 
    x_title = "Spindle length (µm) manual", 
    y_title = "Nr Bulk",
    binextent = [0, 15], 
    binstep = 2,
    x_max = 16
)
SCATTERBIN_SLman_NrBulk

In [53]:
SCATTERBIN_SLman_NrTotal = Scatterbin(
    dataframe = df, 
    x = "Spindle_Length_um_manual", 
    y = "Number_Comets_Total", 
    color = "Condition", 
    x_title = "Spindle length (µm) manual", 
    y_title = "Nr Total",
    binextent = [0, 15], 
    binstep = 2,
    x_max = 16
)
SCATTERBIN_SLman_NrTotal

In [54]:
SCATTERBIN_CV_NrBulk = Scatterbin(
    dataframe = df, 
    x = "Cell_Volume_um3", 
    y = "Number_Comets_SpindleBulk", 
    color = "Condition", 
    x_title = "Cell volume um3", 
    y_title = "Nr Bulk",
    binextent = [0, 4000], 
    binstep = 500,
    x_max = 4500
)
SCATTERBIN_CV_NrBulk

In [55]:
SCATTERBIN_CV_NrTotal = Scatterbin(
    dataframe = df, 
    x = "Cell_Volume_um3", 
    y = "Number_Comets_Total", 
    color = "Condition", 
    x_title = "Cell volume um3", 
    y_title = "Nr Total",
    binextent = [0, 4000], 
    binstep = 500,
    x_max = 4500
)
SCATTERBIN_CV_NrTotal

In [56]:
SCATTERBIN_CV_NrAstral = Scatterbin(
    dataframe = df, 
    x = "Cell_Volume_um3", 
    y = "Number_Comets_Astral", 
    color = "Condition", 
    x_title = "Cell volume um3", 
    y_title = "Nr Astral",
    binextent = [0, 4000], 
    binstep = 500,
    x_max = 4500
)
SCATTERBIN_CV_NrAstral

In [60]:
SCATTER_bin_SCATTER_SV_NrCometsBulk = Scatter_bin(
    dataframe = df, 
    x = "Spindle_Volume_um3", 
    y = "mean(Number_Comets_SpindleBulk)", 
    color = "Condition", 
    x_title = "Spindle volume (µm3)", 
    y_title = "Number_Comets_SpindleBulk",
    Circlesize = 50
)