In [69]:
import os 
import pandas as pd
import altair as alt
alt.renderers.enable('html')
alt.data_transformers.disable_max_rows() # altair now will allow larger datasets
pd.set_option('display.max_colwidth', None) # show entire cell in dataframe


import scipy
from scipy import stats

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [70]:
# Data import

root = "/Volumes/TOB_WD2/Image_Analysis/Mitosis/Dataframes/ODT_FBS_vs_D2" + "/"
df_Path = root + "MasterDataFrame_ODT_Mitosis.csv"

df_all = pd.read_csv(df_Path)

print("The shape of the df after import: " + str(df_all.shape))

df_Path_Avg = root + "MasterDataFrame_ODT_Mitosis_Avg.csv"

df_avg = pd.read_csv(df_Path_Avg)

print("The shape of the df avg after import: " + str(df_avg.shape))


The shape of the df after import: (131, 27)
The shape of the df avg after import: (130, 9)


In [71]:
keep_datasets = ["20220801", "20229714", "20221121"]
df_all = df_all[df_all['Dataset'].isin(keep_datasets)]
df = df[df['Dataset'].isin(keep_datasets)]
df_avg = df_avg[df_avg['Dataset'].isin(keep_datasets)]

In [72]:
df_all.Condition.value_counts()

D0_FBS        33
D2_N2B27RA    22
Name: Condition, dtype: int64

In [73]:
colourscheme = "cividis"

In [74]:
def stripbox(data, x, y, y_title, colour):
    boxplot = alt.Chart().mark_boxplot(
        extent = 'min-max', 
        size = 12
    ).encode(
        y = alt.Y(y, title = y_title),
        opacity = alt.value(1),
        stroke = alt.value('black'),
        color = alt.value('white')
    ).properties(
        width = 30,
        height = 200
    )

    stripplot = alt.Chart().mark_circle(
        size = 20, opacity = 1
    ).encode(
        x = alt.X(
            'jitter:Q',
            title = None,
            axis = alt.Axis(values = [0], grid = False, labels = False, ticks = True),
        ),
        y = alt.Y(y, title = y_title, 
            axis = alt.Axis(grid = False, labels = True, ticks = True)),
        color = alt.Color(colour, scale = alt.Scale(scheme = colourscheme), legend = None)
        ).transform_calculate(
            jitter = '(sqrt(-2 * log(random() / 2)) * cos(2 * PI * random() / 2))'
    ).properties(
        width = 15,
        height = 200
    )
    
    FACETCHART = alt.layer(
        stripplot, boxplot, data = data
        ).facet(
            column = alt.Column(x, header = alt.Header(
                labelAngle = -90,
                titleOrient = 'top',
                labelOrient = 'bottom',
                labelAlign = 'right',
                labelPadding = 5)
                )
        ).configure_facet(
            spacing = 18
        ).configure_axis(
                grid = True, ticks = True, labelPadding = 5
        ).configure_header(
            labelOrient = 'bottom', title = None
        ).configure_view(
            stroke = 'transparent', 
            strokeWidth = 0.5
        )
    return FACETCHART

In [75]:
def Scatterbin(dataframe, x, y, color, x_title, y_title, binextent, binstep,
            Circlesize = 15, 
            CircleOpacity = 0.4,  
            Scatter_width = 300, 
            Scatter_height = 300
               ):
    # Standard scatter plot 
    SCATTER = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = CircleOpacity,
        size = Circlesize
    ).encode(
        alt.X(x, title = x_title),
        alt.Y(y, title = y_title),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        ) 
    )
    
    SCATTER_bin = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = 1,
        size = 150
    ).encode(
        alt.X(x, title = x_title, bin = alt.Bin(extent = binextent, step = binstep)),
        alt.Y("median(" + y + ")", title = y_title, bin = False),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        ) 
    )
    
    ERROR_Scatterbin = alt.Chart(
            data = dataframe
    ).mark_errorbar(extent = "iqr").encode(
        alt.X(x, title = x_title, bin = alt.Bin(extent = binextent, step = binstep)),
        alt.Y("median(" + y + ")", title = y_title, bin = False),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        ) 
    ) 

    
    SCATTERBIN = SCATTER + ERROR_Scatterbin + SCATTER_bin 
    return SCATTERBIN

In [76]:
def Scatter(dataframe, x, y, color, x_title, y_title, log = False):
    # Standard scatter plot
    if log == False:
        SCATTER = alt.Chart(
            data = dataframe, 
            width = 450, 
            height = 450
        ).mark_circle(
            opacity = 0.9,
            size = 40
        ).encode(
            alt.X(x, title = x_title, bin = False),
            alt.Y( y, title = y_title),
            color = alt.Color(
                color, scale = alt.Scale(scheme = colourscheme), legend = None
            )
        )
    else:
        SCATTER = alt.Chart(
            data = dataframe, 
            width = 200, 
            height = 200
        ).mark_circle(
            opacity = 0.6,
            size = 25
        ).encode(
            alt.X(x, title = x_title, scale = alt.Scale(type = 'log')),
            alt.Y(y, title = y_title, scale = alt.Scale(type = 'log')),
            color = alt.Color(
                color, legend = None, scale = None
            ) 
        )
    return SCATTER

In [77]:
def strip_mean(data, x, y, y_title, colour):

    meanplot = alt.Chart(data).mark_circle(
        size = 200, 
        opacity = 1
    ).encode(
        x = alt.X(x, title = None),
        y = alt.Y("mean(" + y + ")", title = y_title),
        color = alt.Color(colour, scale = alt.Scale(scheme = "accent"), legend = None)
    ).properties(
        width = 250,
        height = 250
    )

    stripplot = alt.Chart(data).mark_circle(
        size = 10, opacity = 0.3
    ).encode(
        x = alt.X(
            'jitter:Q',
            title = None,
            axis = alt.Axis(values = [0], grid = False, labels = False, ticks = True),
        ),
        y = alt.Y(y, title = y_title),
        color = alt.Color(colour, scale = alt.Scale(scheme = "accent"), legend = None)
        ).transform_calculate(
            jitter = '(sqrt(-2 * log(random() / 2)) * cos(2 * PI * random() / 2))'
    ).properties(
        width = 250,
        height = 250
    )
    
    return meanplot + stripplot

In [78]:
def Binned_Mean(
    dataframe, x, y, x_title, y_title, color, column, 
    Circlesize = 150):
    
    # Circleplot for binned x data
    BINNED_Circle = alt.Chart().mark_circle(
        size = Circlesize, 
    ).encode(
    alt.X(x, title = None, axis = alt.Axis(grid = False)),
    alt.Y("mean(" + y + ")",  title = y_title, axis = alt.Axis(grid = True)),
    color = alt.Color(
        color, scale = alt.Scale(scheme = colourscheme), legend = None
    ),
    ).properties(
        width = 55,
        height = 200
    )
    
    ERROR_Meanbin = alt.Chart().mark_errorbar(extent = "stdev").encode(
        alt.X(x, title = None, axis = alt.Axis(grid = False)),
        alt.Y("mean(" + y + ")", title = y_title, axis = alt.Axis(grid = True)),
    ).properties(
        width = 55,
        height = 200
    ) 
    
    FACETCHART = alt.layer( 
        ERROR_Meanbin, 
        BINNED_Circle,
        data = dataframe
        ).facet(
            column = alt.Column(column, header = alt.Header(
                labelAngle = -90,
                titleOrient = 'top',
                labelOrient = 'bottom',
                labelAlign = 'right',
                labelPadding = 5)
                )
        ).configure_facet(
            spacing = 1
        )
    
    return FACETCHART

In [79]:
STRIPBOX_CytopMassDensity = stripbox(
    data = df_all, 
    x = "Condition", 
    y = "Mass_density_cytop_mgml",
    y_title = "Cytoplasmic mass density (mg/mL)",
    colour = "Condition"
)
STRIPBOX_CytopMassDensity

In [81]:
STRIPBOX_CytopMassDensity_avg = stripbox(
    data = df_avg, 
    x = "Condition", 
    y = "Mass_density_cytop_mgml",
    y_title = "Cytoplasmic mass density (mg/mL)",
    colour = "Condition"
)
STRIPBOX_CytopMassDensity_avg

In [83]:
STRIPBOX_DNAMassDensity_avg = stripbox(
    data = df_avg, 
    x = "Condition", 
    y = "Mass_density_DNA_mgml",
    y_title = "Chromatin mass density (mg/mL)",
    colour = "Condition"
)
STRIPBOX_DNAMassDensity_avg

In [84]:
STRIPBOX_DNACytopRatio = stripbox(
    data = df_avg, 
    x = "Condition", 
    y = "DNA_cytop_Density_Ratio",
    y_title = "Chromatin:cytoplasm Density ratio",
    colour = "Condition"
)
STRIPBOX_DNACytopRatio

In [85]:
STRIPBOX_CellMassDensity = stripbox(
    data = df_all, 
    x = "Condition", 
    y = "Cell mass density (mg/ml)",
    y_title = "Cell mass density (mg/ml)",
    colour = "Condition"
)
STRIPBOX_CellMassDensity

In [90]:
df_all_filt = df_all[(df_all.Volume < 3200) | (df_all.Volume.isna())]
STRIPBOX_CV = stripbox(
    data = df_all_filt, 
    x = "Condition", 
    y = "Volume",
    y_title = "Cell volume",
    colour = "Condition"
)
STRIPBOX_CV

In [91]:
STRIPBOX_DM = stripbox(
    data = df_all_filt, 
    x = "Condition", 
    y = "Dry_mass",
    y_title = "Cell Dry Mass (pg)",
    colour = "Condition"
)
STRIPBOX_DM

In [93]:
SCATTER_MassDensity = alt.Chart(df_all, width = 200, height = 200).mark_circle().encode(
    x = alt.X('Cell mass density (mg/ml)'),
    y = alt.Y('Mass_density_cytop_mgml'),
    color = "Condition"
)

SCATTER_CytopDNA = alt.Chart(df_all, width = 200, height = 200).mark_circle().encode(
    y = alt.Y('Mass_density_DNA_mgml'),
    x = alt.X('Mass_density_cytop_mgml'),
    color = "Condition"
)

SCATTER_MassDensity_CV = alt.Chart(df_all, width = 200, height = 200).mark_circle().encode(
    y = alt.X('mean(Cell mass density (mg/ml))'),
    x = alt.Y('mean(Volume)'),
    color = "Condition"
)

SCATTER_MassDensity | SCATTER_MassDensity_CV | SCATTER_CytopDNA

In [94]:
SCATTERBIN_DM_CV = Scatterbin(
    dataframe = df_all, 
    x = "Volume", 
    y = "Dry_mass", 
    color = "Condition", 
    x_title = "Cell volume (µm3)", 
    y_title = "Dry mass (pg)", 
    binextent = [0, 3000], 
    binstep = 500
)
SCATTERBIN_DM_CV

In [95]:
SCATTERBIN_MD_CV = Scatterbin(
    dataframe = df_all, 
    x = "Volume", 
    y = "Cell mass density (mg/ml)", 
    color = "Condition", 
    x_title = "Cell volume (µm3)", 
    y_title = "Cell mass density (mg/ml)", 
    binextent = [0, 3000], 
    binstep = 500
)
SCATTERBIN_MD_CV

In [99]:
#df_p1 = df_avg[df_avg["Condition"] == "D0_FBS"]
#df_p2 = df_avg[df_avg["Condition"] == "D2_N2B27RA"]

df_p1 = df_all[df_all["Condition"] == "D0_FBS"]
df_p2 = df_all[df_all["Condition"] == "D2_N2B27RA"]

def ttest(measurement):
    statistic, pvalue = scipy.stats.ttest_ind(
        df_p1[measurement], 
        df_p2[measurement], 
        axis = 0, 
        equal_var = False, 
        nan_policy = 'omit'
    )
    print ("The p-value for {} is: ".format(measurement) + str(pvalue))

ttest = ttest("Cell mass density (mg/ml)")

The p-value for Cell mass density (mg/ml) is: 0.00021131026370192573
