In [2]:
import pandas as pd
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score

import scipy
from scipy import stats

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [3]:
# Data import (Already filtered data!)

df_Path = "/Volumes/TOB_WD2/Image_Analysis/EB1/Dataframes" + "/MasterDataFrame_EB1.csv"
df = pd.read_csv(df_Path)

print("The shape of the df after import: " + str(df.shape))

The shape of the df after import: (376, 35)


In [4]:
# Data import of maxi dataframe

maxdf_Path = "/Volumes/TOB_WD2/Image_Analysis/EB1/Dataframes" + "/" + "MaximumDataFrame.csv"
max_df = pd.read_csv(maxdf_Path)

print("The shape of the df after import: " + str(max_df.shape))

The shape of the df after import: (346, 7)


In [5]:
# Filter datasets
Keep = ["20220916", "20220921", "20220922", "20221021", "20221027", "20221103"] 
df = df[df['Experiment'].isin(Keep)]

df = df[(df.Cell_Volume_um3 < 4500) | (df.Cell_Volume_um3.isna())]
df = df[(df.Average_Track_Velocity > 0.15) | (df.Average_Track_Velocity.isna())]

print("The shape of the df after dataset filtering: " + str(df.shape))

The shape of the df after dataset filtering: (245, 35)


In [7]:
df.groupby(["Condition"]).Number_Comets_Total.mean()

Condition
FBS+LIF     65.86385
N2B27+RA    56.71875
Name: Number_Comets_Total, dtype: float64

In [30]:
df.groupby(["Condition"]).Number_Comets_Astral.mean()

Condition
FBS+LIF     12.126761
N2B27+RA    13.395833
Name: Number_Comets_Astral, dtype: float64

In [29]:
df.groupby(["Condition"]).Number_Comets_SpindleBulk.mean()


Condition
FBS+LIF     53.737089
N2B27+RA    43.322917
Name: Number_Comets_SpindleBulk, dtype: float64

In [8]:
df_p1 = df[df["Condition"] == "FBS+LIF"]
df_p2 = df[df["Condition"] == "N2B27+RA"]

def ttest(measurement):
    statistic, pvalue = scipy.stats.ttest_ind(
        df_p1[measurement], 
        df_p2[measurement], 
        axis = 0, 
        equal_var = False, 
        nan_policy = 'omit'
    )
    print ("The p-value for {} is: ".format(measurement) + str(pvalue))

In [9]:
ttest("Average_Track_Velocity")

The p-value for Average_Track_Velocity is: 0.4148841870364083


In [10]:
ttest("Number_Comets_SpindleBulk")

The p-value for Number_Comets_SpindleBulk is: 5.543802665483474e-08


In [11]:
ttest("CometsBulk_per_Cell_Volume")

The p-value for CometsBulk_per_Cell_Volume is: 0.027685365797216067


In [12]:
ttest("CometsTotal_per_Cell_Volume")

The p-value for CometsTotal_per_Cell_Volume is: 0.47088004068443967


In [13]:
maxdf_p1 = max_df[max_df["Condition"] == "FBS+LIF"]
maxdf_p2 = max_df[max_df["Condition"] == "N2B27+RA"]

def ttest2(measurement):
    statistic, pvalue = scipy.stats.ttest_ind(
        maxdf_p1[measurement], 
        maxdf_p2[measurement], 
        axis = 0, 
        equal_var = False, 
        nan_policy = 'omit'
    )
    print ("The p-value for {} is: ".format(measurement) + str(pvalue))

In [14]:
ttest2("Distance_EB1_maximum")

The p-value for Distance_EB1_maximum is: 2.090909937031538e-05


In [15]:
ttest("Astral:Bulk_Comets")

The p-value for Astral:Bulk_Comets is: 1.1069819071612568e-07


In [16]:
ttest("CometsAstral_per_Cell_Volume")


The p-value for CometsAstral_per_Cell_Volume is: 0.00074000557569353


In [17]:
# Linear Regression

def linear_fit(dataframe, independent_column, dependent_column): 
    dataframe = dataframe[dataframe[[independent_column, dependent_column]].notnull().all(1)]
    length = dataframe.shape[0]

    X = dataframe[independent_column].values.reshape(length, 1)
    y = dataframe[dependent_column].values.reshape(length, 1)

    regr = linear_model.LinearRegression()
    regr.fit(X, y)
    y_predicted = regr.predict(X)

    # model evaluation
    rmse = mean_squared_error(y, y_predicted)
    R2 = r2_score(y, y_predicted)
    slope = regr.coef_
    interc = regr.intercept_
    
    return rmse, R2, slope, interc

In [18]:
print("Cell Volume vs Average Track Velocity")
rmse, R2, slope, interc = linear_fit(df, "Cell_Volume_um3", "Average_Track_Velocity")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Cell Volume vs Average Track Velocity
Slope:  [[2.93388812e-06]]
Intercept:  [0.25378591]
Root mean squared error:  0.0010689795011869887
R2 score:  0.0016114241080937841


In [19]:
print("Spindle volume vs Average Track Velocity")
rmse, R2, slope, interc = linear_fit(df, "Spindle_Volume_um3", "Average_Track_Velocity")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle volume vs Average Track Velocity
Slope:  [[0.00010174]]
Intercept:  [0.22413332]
Root mean squared error:  0.0014193205775326096
R2 score:  0.04309413196212486


In [20]:
print("Cell Volume vs Number of tracks")
rmse, R2, slope, interc = linear_fit(df, "Cell_Volume_um3", "Number_Comets_SpindleBulk")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Cell Volume vs Number of tracks
Slope:  [[0.01063846]]
Intercept:  [20.43917048]
Root mean squared error:  111.76858645663197
R2 score:  0.1776858027173469


In [21]:
print("Spindle Volume vs Number of tracks")
rmse, R2, slope, interc = linear_fit(df, "Spindle_Volume_um3", "Number_Comets_SpindleBulk")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle Volume vs Number of tracks
Slope:  [[0.06606693]]
Intercept:  [25.83161476]
Root mean squared error:  49.010144695697385
R2 score:  0.39843568980117516


In [22]:
print("Spindle Length vs Number of tracks")
rmse, R2, slope, interc = linear_fit(df, "Spindle_Length_um", "Number_Comets_SpindleBulk")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle Length vs Number of tracks
Slope:  [[4.16404196]]
Intercept:  [0.00115013]
Root mean squared error:  54.150080184727145
R2 score:  0.3353466749426638


In [23]:
print("Spindle Length man vs Velocity ALL")
rmse, R2, slope, interc = linear_fit(df, "Spindle_Length_um_manual", "Average_Track_Velocity")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle Length man vs Velocity ALL
Slope:  [[0.00061343]]
Intercept:  [0.25545226]
Root mean squared error:  0.0010759585463515173
R2 score:  0.0007353712744595509


In [24]:
df_P1 = df[df["Condition"] == "FBS+LIF"]
df_P2 = df[df["Condition"] == "N2B27+RA"]

print("Spindle Length man vs Velocity FBS+LIF")
rmse, R2, slope, interc = linear_fit(df_P1, "Spindle_Length_um_manual", "Average_Track_Velocity")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle Length man vs Velocity FBS+LIF
Slope:  [[0.00050187]]
Intercept:  [0.25481229]
Root mean squared error:  0.001173721826492044
R2 score:  0.0003277248547737255


In [25]:
print("Spindle Length man vs Velocity N2B27+RA")
rmse, R2, slope, interc = linear_fit(df_P2, "Spindle_Length_um_manual", "Average_Track_Velocity")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle Length man vs Velocity N2B27+RA
Slope:  [[0.0050857]]
Intercept:  [0.21252664]
Root mean squared error:  0.0009203254882094655
R2 score:  0.02854618417375565


In [26]:
print("Spindle Length manual vs Number of tracks")
rmse, R2, slope, interc = linear_fit(df, "Spindle_Length_um_manual", "Number_Comets_SpindleBulk")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle Length manual vs Number of tracks
Slope:  [[4.75746221]]
Intercept:  [-3.74105154]
Root mean squared error:  91.35738216439917
R2 score:  0.32785700560405684


In [27]:
print("Spindle Length manual vs Number of tracks")
rmse, R2, slope, interc = linear_fit(df_p1, "Spindle_Length_um_manual", "Number_Comets_SpindleBulk")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle Length manual vs Number of tracks
Slope:  [[3.73380177]]
Intercept:  [9.76816236]
Root mean squared error:  118.06876905436344
R2 score:  0.1518963106385044


In [28]:
print("Spindle Length manual vs Number of tracks")
rmse, R2, slope, interc = linear_fit(df_p2, "Spindle_Length_um_manual", "Number_Comets_SpindleBulk")  
print('Slope: ', slope)
print('Intercept: ', interc)
print('Root mean squared error: ', rmse)
print('R2 score: ', R2)

Spindle Length manual vs Number of tracks
Slope:  [[3.96055026]]
Intercept:  [2.65531038]
Root mean squared error:  46.01028313870642
R2 score:  0.25783092213031833
