In [2]:
import os 
import pandas as pd
import altair as alt
alt.renderers.enable('html')
alt.data_transformers.disable_max_rows() # altair now will allow larger datasets
pd.set_option('display.max_colwidth', None) # show entire cell in dataframe

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [3]:
# Data import

root = "/Volumes/TOB_WD2/Image_Analysis/Osmosis/DataFrames" + "/"
df_Path = root + "MasterDataFrame_Osmosis_Mitosis.csv"

df = pd.read_csv(df_Path)

print("The shape of the df after import: " + str(df.shape))

The shape of the df after import: (580, 31)


In [5]:
df.groupby(["Condition"]).Cell_ID.nunique()

Condition
Control_6633            35
Control_7525ON_Short    39
Control_8020            27
Control_8020ON_Short    34
Control_8020ON_long     24
Hypo_6633               27
Hypo_7525ON_Short       37
Hypo_8020               29
Hypo_8020ON             18
Hypo_8020ON_Short       28
Hypo_8020ON_long        20
Name: Cell_ID, dtype: int64

In [6]:
keep_datasets = ["20230919", "20230926", "20230928", "20231013"] #75/25 replicates

df = df[df['Experiment'].isin(keep_datasets)]

df = df[(df.Volume < 3200) | (df.Volume.isna())]


df.shape

(206, 31)

In [7]:
final_out = root + "/" + "MasterDataFrame_ODT_Osmosis_Mitosis_for_stats.csv"
df.to_csv(final_out)
print("Successfully saved dataframe to {}".format(final_out))

Successfully saved dataframe to /Volumes/TOB_WD2/Image_Analysis/Osmosis/DataFrames//MasterDataFrame_ODT_Osmosis_Mitosis_for_stats.csv


In [15]:
colourscheme = "cividis"

In [16]:
def stripbox(data, x, y, y_title, colour):
    boxplot = alt.Chart().mark_boxplot(
        extent = 'min-max', 
        size = 12
    ).encode(
        y = alt.Y(y, title = y_title),
        opacity = alt.value(1),
        stroke = alt.value('black'),
        color = alt.value('white')
    ).properties(
        width = 30,
        height = 200
    )

    stripplot = alt.Chart().mark_circle(
        size = 20, opacity = 1
    ).encode(
        x = alt.X(
            'jitter:Q',
            title = None,
            axis = alt.Axis(values = [0], grid = False, labels = False, ticks = True),
        ),
        y = alt.Y(y, title = y_title, 
            axis = alt.Axis(grid = False, labels = True, ticks = True)),
        color = alt.Color(colour, scale = alt.Scale(scheme = colourscheme), legend = None)
        ).transform_calculate(
            jitter = '(sqrt(-2 * log(random() / 2)) * cos(2 * PI * random() / 2))'
    ).properties(
        width = 15,
        height = 200
    )
    
    FACETCHART = alt.layer(
        stripplot, boxplot, data = data
        ).facet(
            column = alt.Column(x, header = alt.Header(
                labelAngle = -90,
                titleOrient = 'top',
                labelOrient = 'bottom',
                labelAlign = 'right',
                labelPadding = 5)
                )
        ).configure_facet(
            spacing = 18
        ).configure_axis(
                grid = True, ticks = True, labelPadding = 5
        ).configure_header(
            labelOrient = 'bottom', title = None
        ).configure_view(
            stroke = 'transparent', 
            strokeWidth = 0.5
        )
    return FACETCHART

In [17]:
def Scatterbin(dataframe, x, y, color, x_title, y_title, binextent, binstep, x_min, x_max, y_min, y_max,
            Circlesize = 20, 
            CircleOpacity = 0.4,  
            Scatter_width = 200, 
            Scatter_height = 200
               ):
    # Standard scatter plot 
    SCATTER = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = CircleOpacity,
        size = Circlesize
    ).encode(
        alt.X(x, title = x_title, scale = alt.Scale(domain = [x_min, x_max])),
        alt.Y(y, title = y_title, scale = alt.Scale(domain = [y_min, y_max])),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)
        ) 
    )
    
    SCATTER_bin = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = 1,
        size = 200
    ).encode(
        alt.X(x, title = x_title, bin = alt.Bin(extent = binextent, step = binstep)), 
        alt.Y("median(" + y + ")", title = y_title, bin = False), 
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)
        ) 
    )
    
    ERROR_Scatterbin = alt.Chart(
            data = dataframe
    ).mark_errorbar(extent = "iqr").encode(
        alt.X(x, title = x_title, bin = alt.Bin(extent = binextent, step = binstep)),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)
        ) 
    ) 

    
    SCATTERBIN = SCATTER + ERROR_Scatterbin + SCATTER_bin 
    return SCATTERBIN

In [18]:
def Scatter(dataframe, x, y, color, x_title, y_title, log = False):
    # Standard scatter plot
    if log == False:
        SCATTER = alt.Chart(
            data = dataframe, 
            width = 450, 
            height = 450
        ).mark_circle(
            opacity = 0.9,
            size = 40
        ).encode(
            alt.X(x, title = x_title, bin = False),
            alt.Y( y, title = y_title),
            color = alt.Color(
                color, scale = alt.Scale(scheme = colourscheme), legend = None
            )
        )
    else:
        SCATTER = alt.Chart(
            data = dataframe, 
            width = 200, 
            height = 200
        ).mark_circle(
            opacity = 0.6,
            size = 25
        ).encode(
            alt.X(x, title = x_title, scale = alt.Scale(type = 'log')),
            alt.Y(y, title = y_title, scale = alt.Scale(type = 'log')),
            color = alt.Color(
                color, legend = None, scale = None
            ) 
        )
    return SCATTER

In [19]:
def Binned_Boxplot(
    data, x, y, x_title, y_title, color, column, 
    Boxsize = 7,  
    Bin_Boxplot_width = 200, 
    Bin_Boxplot_height = 250):
    BINNED_BOX = alt.Chart(
        data = data, 
        width = 100, 
        height = 250
    ).mark_boxplot(
        size = Boxsize, 
        extent = "min-max"
    ).encode(
    alt.X(x, title = None),
    alt.Y(y,  title = y_title),
    color = alt.Color(
        color, scale = alt.Scale(scheme = colourscheme)
    ),
    column = column
    ).configure_facet(
            spacing = 10
    )
    return BINNED_BOX

In [20]:
def Binned_Mean(
    dataframe, x, y, x_title, y_title, color, column, 
    Circlesize = 150):
    
    # Circleplot for binned x data
    BINNED_Circle = alt.Chart().mark_circle(
        size = Circlesize, 
    ).encode(
    alt.X(x, title = None, axis = alt.Axis(grid = False)),
    alt.Y("mean(" + y + ")",  title = y_title, axis = alt.Axis(grid = True)),
    color = alt.Color(
        color, scale = alt.Scale(scheme = colourscheme), legend = None
    ),
    ).properties(
        width = 55,
        height = 200
    )
    
    ERROR_Meanbin = alt.Chart().mark_errorbar(extent = "stdev").encode(
        alt.X(x, title = None, axis = alt.Axis(grid = False)),
        alt.Y("mean(" + y + ")", title = y_title, axis = alt.Axis(grid = True)),
    ).properties(
        width = 55,
        height = 200
    ) 
    
    FACETCHART = alt.layer( 
        ERROR_Meanbin, 
        BINNED_Circle,
        data = dataframe
        ).facet(
            column = alt.Column(column, header = alt.Header(
                labelAngle = -90,
                titleOrient = 'top',
                labelOrient = 'bottom',
                labelAlign = 'right',
                labelPadding = 5)
                )
        ).configure_facet(
            spacing = 1
        )
    
    return FACETCHART

In [21]:
STRIP_CV = stripbox(
    data = df, 
    x = "Condition", 
    y = "Volume", 
    y_title = "Cell volume (µm3)", 
    colour = "Condition"
)
STRIP_CV

In [22]:
STRIP_DryMass = stripbox(
    data = df, 
    x = "Condition", 
    y = "Dry_mass", 
    y_title = "Cell Dry mass (pg)", 
    colour = "Condition"
)
STRIP_DryMass

In [23]:
STRIP_MD_2d = stripbox(
    data = df, 
    y = "Mass_density_cytop_mgml", 
    x = "Condition", 
    y_title = "Mass density 2D (mg/ml)", 
    colour = "Condition"
)
STRIP_MD_2d

In [24]:
STRIP_MD_3d = stripbox(
    data = df, 
    y = "Cell mass density (mg/ml)", 
    x = "Condition", 
    y_title = "Mass density 3D (mg/ml)", 
    colour = "Condition"
)
STRIP_MD_3d 

In [25]:
STRIP_SpindleArea = stripbox(
    data = df, 
    y = "Area", 
    x = "Condition", 
    y_title = "Spindle area (µm2)", 
    colour = "Condition"
)
STRIP_SpindleArea

In [26]:
STRIP_SpindleOccupancy = stripbox(
    data = df, 
    y = "Spindle_occupancy", 
    x = "Condition", 
    y_title = "Spindle occupancy", 
    colour = "Condition"
)
STRIP_SpindleOccupancy

In [27]:
STRIP_SpindleDryMassRatio = stripbox(
    data = df, 
    y = "SpindleArea_DryMass_Ratio", 
    x = "Condition", 
    y_title = "SpindleArea:DryMass", 
    colour = "Condition"
)
STRIP_SpindleDryMassRatio

In [28]:
SCATTERBIN_SA_CV = Scatterbin(
    dataframe = df, 
    x = "Volume", 
    y = "Area", 
    color = "Condition", 
    x_title = "Cell volume (µm3)", 
    y_title = "Spindle area (µm2)",
    binextent = [0, 4000], 
    binstep = 500,
    x_min = 0,
    x_max = 3500,
    y_min = 0,
    y_max = 35
)
SCATTERBIN_SA_CV

In [29]:
SCATTERBIN_SA_DM = Scatterbin(
    dataframe = df, 
    x = "Dry_mass", 
    y = "Area", 
    color = "Condition", 
    x_title = "Dry mass (pg)", 
    y_title = "Spindle area (µm2)",
    binextent = [0, 600], 
    binstep = 50,
    x_min = 0,
    x_max = 400,
    y_min = 0,
    y_max = 35
)
SCATTERBIN_SA_DM

In [30]:
SCATTERBIN_SA_MD = Scatterbin(
    dataframe = df, 
    x = "Cell mass density (mg/ml)", 
    y = "Area", 
    color = "Frame", 
    x_title = "Cell mass density (mg/ml)", 
    y_title = "Spindle Area",
    binextent = [0, 150], 
    binstep = 10,
    x_min = 0,
    x_max = 140,
    y_min = 0,
    y_max = 35
)
SCATTERBIN_SA_MD 

In [31]:
SCATTERBIN_SO_MD = Scatterbin(
    dataframe = df, 
    x = "Cell mass density (mg/ml)", 
    y = "Spindle_occupancy", 
    color = "Frame", 
    x_title = "Cell mass density (mg/ml)", 
    y_title = "Spindle occupancy",
    binextent = [0, 140], 
    binstep = 10,
    x_min = 80,
    x_max = 140,
    y_min = 0.004,
    y_max = 0.013
)
SCATTERBIN_SO_MD 

In [32]:
SCATTERBIN_SADM_MD = Scatterbin(
    dataframe = df, 
    x = "Cell mass density (mg/ml)", 
    y = "SpindleArea_DryMass_Ratio", 
    color = "Condition", 
    x_title = "Cell mass density (mg/ml)", 
    y_title = "SpindleArea_DryMass_Ratio",
    binextent = [0, 140], 
    binstep = 10,
    x_min = 80,
    x_max = 140,
    y_min = 0.04,
    y_max = 0.13
)
SCATTERBIN_SADM_MD

In [33]:
SCATTERBIN_2D_3D = Scatterbin(
    dataframe = df, 
    x = "Cell mass density (mg/ml)", 
    y = "Mass_density_cytop_mgml", 
    color = "Condition", 
    x_title = "3D", 
    y_title = "2D",
    binextent = [0, 160], 
    binstep = 10,
    x_min = 80,
    x_max = 160,
    y_min = 80,
    y_max = 175
)
SCATTERBIN_2D_3D

In [34]:
SCATTERBIN_DM_CV = Scatterbin(
    dataframe = df, 
    x = "Volume", 
    y = "Dry_mass", 
    color = "Condition", 
    x_title = "Cell volume (µm3)", 
    y_title = "Dry mass (pg)", 
    binextent = [0, 3000], 
    binstep = 500,
    x_min = 1500,
    x_max = 3500,
    y_min = 150,
    y_max = 400
)
SCATTERBIN_DM_CV

In [35]:
BINBOX_CV = Binned_Boxplot(
    data = df, 
    x = "Condition:N", 
    y = "Volume", 
    x_title = "", 
    y_title = "Cell volume", 
    color = "Condition", 
    column = "Experiment"
) 
BINBOX_CV

In [36]:
BINBOX_MD3d = Binned_Boxplot(
    data = df, 
    x = "Condition", 
    y = "Cell mass density (mg/ml)", 
    x_title = "", 
    y_title = "Mass density 3D (mg/ml)", 
    color = "Condition", 
    column = "Experiment"
) 
BINBOX_MD3d

In [37]:
BINBOX_MD = Binned_Boxplot(
    data = df, 
    x = "Condition", 
    y = "Mass_density_cytop_mgml", 
    x_title = "", 
    y_title = "Mass density 2D (mg/ml)", 
    color = "Condition", 
    column = "Experiment"
) 
BINBOX_MD