In [1]:
import os 
import pandas as pd
pd.options.display.max_colwidth = 100
import altair as alt
alt.renderers.enable('html')
alt.data_transformers.disable_max_rows() # altair now handles larger datasets

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [2]:
# Data import

root = "/Volumes/TOB_WD2/Image_Analysis/CCB02/Dataframes" + "/"
df_Path = root + "MasterDataFrame_CCB02_gTub.csv"

df = pd.read_csv(df_Path)

print("The shape of the df after import: " + str(df.shape))

The shape of the df after import: (890, 36)


In [4]:
# Filter datasets
df = df[(df.Cell_Volume_um3 < 4000) | (df.Cell_Volume_um3.isna())]

Keep = ["20230623", "20230721", "20230913"] 
df = df[df['Experiment'].isin(Keep)]

Keep_condition = ["1_DMSO", "5_10µM", "7_N2B27+RA"] 
df = df[df['Condition'].isin(Keep_condition)]

print("The shape of the df after filtering: " + str(df.shape))

The shape of the df after filtering: (287, 36)


In [5]:
df.groupby(["Condition"]).Cell_ID.nunique()

Condition
1_DMSO        109
5_10µM        121
7_N2B27+RA     57
Name: Cell_ID, dtype: int64

In [6]:
colourscheme = "cividis"

In [7]:
def stripbox(data, x, y, y_title, colour):
    boxplot = alt.Chart().mark_boxplot(
        extent = 'min-max', 
        size = 12
    ).encode(
        y = alt.Y(y, title = y_title),
        opacity = alt.value(1),
        stroke = alt.value('black'),
        color = alt.value('white')
    ).properties(
        width = 30,
        height = 200
    )

    stripplot = alt.Chart().mark_circle(
        size = 20, opacity = 1
    ).encode(
        x = alt.X(
            'jitter:Q',
            title = None,
            axis = alt.Axis(values = [0], grid = False, labels = False, ticks = True),
        ),
        y = alt.Y(y, title = y_title, 
            axis = alt.Axis(grid = False, labels = True, ticks = True)),
        color = alt.Color(colour, scale = alt.Scale(scheme = colourscheme), legend = None)
        ).transform_calculate(
            jitter = '(sqrt(-2 * log(random() / 2)) * cos(2 * PI * random() / 2))'
    ).properties(
        width = 15,
        height = 200
    )
    
    FACETCHART = alt.layer(
        stripplot, boxplot, data = data
        ).facet(
            column = alt.Column(x, header = alt.Header(
                labelAngle = -90,
                titleOrient = 'top',
                labelOrient = 'bottom',
                labelAlign = 'right',
                labelPadding = 5)
                )
        ).configure_facet(
            spacing = 18
        ).configure_axis(
                grid = True, ticks = True, labelPadding = 5
        ).configure_header(
            labelOrient = 'bottom', title = None
        ).configure_view(
            stroke = 'transparent', 
            strokeWidth = 0.5
        )
    return FACETCHART

In [8]:
def Group(dataframe, x, y, y_title, color, column, 
          BoxOpacity = 1, 
          Group_width = 100, 
          Group_height = 200, 
          size = 12):
    
    GROUP = alt.Chart(
        data = dataframe, 
        width = Group_width, 
        height = Group_height
    ).mark_boxplot(
        opacity = BoxOpacity,
        extent = "min-max",
        size = size
    ).encode(
        alt.X(x, title = None),
        alt.Y(y, title = y_title),
        color = alt.Color(color, scale = alt.Scale(scheme = colourscheme)),
        column = alt.Column(column)
    )
    return GROUP

In [9]:
def Scatter_bin(dataframe, x, y, color, x_title, y_title, binextent, binstep,
            Circlesize = 15, 
            CircleOpacity = 0.2,  
            Scatter_width = 250, 
            Scatter_height = 250
               ):
    # Standard scatter plot 
    SCATTER = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = CircleOpacity,
        size = Circlesize
    ).encode(
        alt.X(x, title = x_title),
        alt.Y(y, title = y_title),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        ) 
    )
    
    SCATTER_bin = alt.Chart(
        data = dataframe, 
        width = Scatter_width, 
        height = Scatter_height
    ).mark_circle(
        opacity = 1,
        size = 150
    ).encode(
        alt.X(x, title = x_title, bin = alt.Bin(extent = binextent, step = binstep)),
        alt.Y("median(" + y + ")", title = y_title, bin = False),
        color = alt.Color(
            color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        ) 
    )
    
    ERROR_Scatterbin = alt.Chart(
            data = dataframe
    ).mark_errorbar(extent = "iqr").encode(
        alt.X(x, title = x_title, bin = alt.Bin(extent = binextent, step = binstep)),
        alt.Y("median(" + y + ")", title = y_title, bin = False),
        #color = alt.Color(
        #    color, scale = alt.Scale(scheme = colourscheme)#, legend = None#, scale = None
        #) 
    ) 

    
    SCATTERBIN = SCATTER + ERROR_Scatterbin + SCATTER_bin 
    return SCATTERBIN

In [11]:
STRIP_Cell_Volumes = stripbox(
    data = df, 
    x = "Condition:N", 
    y = "Cell_Volume_um3", 
    y_title = "Cell volume (µm3)",
    colour = "Target:N"
)
STRIP_Cell_Volumes

In [29]:
STRIP_Centrosome_Partitioning = stripbox(
    data = df, 
    x = "Condition:N", 
    y = "gTub_mass_at_poles_percent", 
    y_title = "% gTub total mass in centrosome",
    colour = "Target:N"
)
STRIP_Centrosome_Partitioning

In [30]:
GROUP_Centrosome_Partitioning =  Group(
    dataframe = df, 
    x = "Experiment:N", 
    y = "gTub_mass_at_poles_percent", 
    y_title = "% gTub total mass in centrosome", 
    color = "Target:N", 
    column = "Condition:N"
)
GROUP_Centrosome_Partitioning

In [37]:
SCATTERBIN_SV_Percent = Scatter_bin(
    dataframe = df, 
    y = "Spindle_Volume_um3", 
    y_title = "Spindle Volume (µm3)",
    x = "gTub_mass_at_poles_percent",
    x_title = "% of total g-Tubulin mass at centrosomes",
    color = "Condition",
    binextent = [0, 10],
    binstep = 2
   # Circlesize = 5,
   # CircleOpacity = 1
)
SCATTERBIN_SV_Percent

In [38]:
SCATTERBIN_SO_Percent = Scatter_bin(
    dataframe = df, 
    y = "Spindle_Occupancy", 
    y_title = "Spindle occupancy (%)",
    x = "gTub_mass_at_poles_percent",
    x_title = "% g-Tubulin mass at centrosomes",
    color = "Condition",
    binextent = [0, 10],
    binstep = 2
   # Circlesize = 5,
   # CircleOpacity = 1
)
SCATTERBIN_SO_Percent

In [39]:
SCATTERBIN_CV_Partitioning = Scatter_bin(
    dataframe = df, 
    y = "gTub_mass_at_poles_percent", 
    y_title = "gTub_mass_at_poles_percent",
    x = "Cell_Volume_um3",
    x_title = "Cell volume (µm3)",
    color = "Condition",
    binextent = [0, 4500],
    binstep = 500
   # Circlesize = 5,
   # CircleOpacity = 1
)
SCATTERBIN_CV_Partitioning

In [40]:
SCATTERBIN_CV_CentV = Scatter_bin(
    dataframe = df, 
    y = "Centrosome_Volume_um3", 
    y_title = "Centrosome volume (µm3)",
    x = "Cell_Volume_um3",
    x_title = "Cell volume (µm3)",
    color = "Condition",
    binextent = [0, 4500],
    binstep = 500
   # Circlesize = 5,
   # CircleOpacity = 1
)
SCATTERBIN_CV_CentV

In [41]:
# Export Dataframe

destination = "/Volumes/TOB_WD2/Image_Analysis/CCB02/Dataframes" + "/MasterDataFrame_CCB02_gTub_for_stats.csv"
df.to_csv(destination)
print("Successfully saved dataframe at {}".format(destination))

Successfully saved dataframe at /Volumes/TOB_WD2/Image_Analysis/CCB02/Dataframes/MasterDataFrame_CCB02_gTub_for_stats.csv


In [42]:
def normalise_to_ctrl(measurement, data = df):
    
    dataframes = []
    experimentlist = data["Experiment"].unique().tolist()

    for experiment in experimentlist:
        data_experiment = data[data["Experiment"] == experiment]
        mean_d0 = data_experiment[
                data_experiment["Condition"] == "1_DMSO"
        ][measurement].mean()

        
        data_experiment[measurement + "_norm"] = data_experiment[measurement] / mean_d0
        data_experiment = data_experiment[["Cell_ID", "Condition", measurement + "_norm"]]
        dataframes.append(data_experiment)
    
    df = pd.concat(dataframes)
    return df



df_norm_CO = normalise_to_ctrl(measurement = "Centrosome_Occupancy")
df_norm_centmass = normalise_to_ctrl(measurement = "gTub_mass_at_poles_percent")
df_norm_CentV = normalise_to_ctrl(measurement = "Centrosome_Volume_um3")
df_norm_AverageFl = normalise_to_ctrl(measurement = "Average_gTub_Poles_norm")
df_norm_AverageFlunorm = normalise_to_ctrl(measurement = "Average_Pole_gTub")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

In [43]:
STRIP_CO_D2 = stripbox(
    data = df_norm_CO,#[df_norm_CO["Condition"] == "5_10µM"], 
    x = "Condition", 
    y = "Centrosome_Occupancy_norm", 
    y_title = "Centrosome occupancy (normalised to ctrl)", 
    colour = "Condition"
)
STRIP_CO_D2

In [44]:
STRIP_Cmass_D2 = stripbox(
    data = df_norm_centmass,
    x = "Condition", 
    y = "gTub_mass_at_poles_percent_norm", 
    y_title = "% gTub mass at poles (normalised to ctrl)", 
    colour = "Condition"
)
STRIP_Cmass_D2

In [45]:
STRIP_CentVol_D2 = stripbox(
    data = df_norm_CentV,#[df_norm_centmass["Condition"] == "5_10µM"], 
    x = "Condition", 
    y = "Centrosome_Volume_um3_norm", 
    y_title = "Centrosome volume (normalised to ctrl)", 
    colour = "Condition"
)
STRIP_CentVol_D2

In [46]:
STRIP_AverageFl_D2 = stripbox(
    data = df_norm_AverageFl,#[df_norm_centmass["Condition"] == "5_10µM"], 
    x = "Condition", 
    y = "Average_gTub_Poles_norm_norm", 
    y_title = "Average gTub fluorescence (normalised to ctrl)", 
    colour = "Condition"
)
STRIP_AverageFl_D2

In [47]:
df_norm_AverageFl.groupby("Condition").Average_gTub_Poles_norm_norm.mean()

Condition
1_DMSO        1.000000
5_10µM        1.131166
7_N2B27+RA    0.957336
Name: Average_gTub_Poles_norm_norm, dtype: float64

In [48]:
STRIP_AverageFlunnorm_D2 = stripbox(
    data = df_norm_AverageFlunorm,#[df_norm_centmass["Condition"] == "5_10µM"], 
    x = "Condition", 
    y = "Average_Pole_gTub_norm", 
    y_title = "Average gTub fluorescence (normalised to ctrl)", 
    colour = "Condition"
)
STRIP_AverageFlunnorm_D2