In [2]:
import pandas as pd
import numpy as np

import statsmodels.api as sm
from statsmodels.formula.api import ols
from pathlib import Path
%load_ext watermark

### Set the location of the results data and where to save the figures
You can find the location of the results data on [line 36 of analyse_data.py](https://github.com/Jalink-lab/pde-screen-2021/blob/main/analyse_data.py#L36)

In [3]:
WRITE_PATH = Path('D:\\', 'Temp', 'Screening_Result')
SAVE_FIGURE = [True, Path('D:\\', 'Temp', 'Screening_Figures')]

In [4]:
data1 = pd.read_csv(Path(WRITE_PATH,'2019','11','07','chemical','results','all_results.csv'))
data2 = pd.read_csv(Path(WRITE_PATH,'2020','01','07','chemical','results','all_results.csv'))
data3 = pd.read_csv(Path(WRITE_PATH,'2020','02','06','chemical','results','all_results.csv'))
data4 = pd.read_csv(Path(WRITE_PATH,'2019','12','05','caged','results','all_results.csv'))
data5 = pd.read_csv(Path(WRITE_PATH,'2020','02','06','caged','results','all_results.csv'))

In [5]:
data1 = data1.replace([np.inf, -np.inf], np.nan)
data2 = data2.replace([np.inf, -np.inf], np.nan)
data3 = data3.replace([np.inf, -np.inf], np.nan)
data4 = data4.replace([np.inf, -np.inf], np.nan)
data5 = data5.replace([np.inf, -np.inf], np.nan)
data1.rename(columns={"breakdown_time(s)": "breakdown_time"}, inplace=True)
data2.rename(columns={"breakdown_time(s)": "breakdown_time"}, inplace=True)
data3.rename(columns={"breakdown_time(s)": "breakdown_time"}, inplace=True)
data4.rename(columns={"breakdown_time(s)": "breakdown_time"}, inplace=True)
data5.rename(columns={"breakdown_time(s)": "breakdown_time"}, inplace=True)

In [6]:
# here depending on what data you want to analayse with a pairvise t-test you just enter
data = data1
fit_noerrors = data[data['error']==0] # <<<<<<< the data that passed all hurdles!
fit_hasdarkframe = data[data['error']==8]  # <<<<<<< the data that was corrected for "dark frame", but otherwise passed all hurdles!
errorfree_data = pd.concat([fit_noerrors,fit_hasdarkframe])  # <<<<<<< We only further analyse the errorfree data!
errorfree_data = errorfree_data[errorfree_data['breakdown_time']<600] # <<<<<<< We only further analyse the data with reasonable breakdown times, eliminating a couple of clear outliers from the dataframe

In [9]:
# here we run a pairvise t-test and write the results to a csv file
mod = ols('breakdown_time ~ condition', data=errorfree_data).fit()
pair_t = mod.t_test_pairwise('condition')
if SAVE_FIGURE[0]:
    SAVE_FIGURE[1].mkdir(parents=True, exist_ok=True)
    pair_t.result_frame.to_csv(Path(SAVE_FIGURE[1], "Pairvise t test.csv"))
pair_t.result_frame.tail(20)

Unnamed: 0,coef,std err,t,P>|t|,Conf. Int. Low,Conf. Int. Upp.,pvalue-hs,reject-hs
PDE7B-PDE6G,-1.727822,1.208077,-1.430225,0.15267,-4.09577,0.640126,0.999999,False
PDE8A-PDE6G,-1.556361,1.244191,-1.250902,0.210987,-3.995097,0.882374,1.0,False
PDE8B-PDE6G,-1.433646,1.365271,-1.050081,0.293695,-4.10971,1.242418,1.0,False
RNAiMAX_reagent_only-PDE6G,-4.226221,1.190202,-3.550844,0.000385,-6.559132,-1.893309,0.061559,False
Untransfected_cells-PDE6G,-2.280944,1.395098,-1.63497,0.102073,-5.015472,0.453584,0.999931,False
PDE7B-PDE7A,-0.064151,1.304095,-0.049192,0.960767,-2.620305,2.492003,1.0,False
PDE8A-PDE7A,0.10731,1.33762,0.080224,0.93606,-2.514555,2.729174,1.0,False
PDE8B-PDE7A,0.230025,1.450924,0.158537,0.874035,-2.613927,3.073977,1.0,False
RNAiMAX_reagent_only-PDE7A,-2.56255,1.287554,-1.990246,0.046579,-5.086281,-0.038818,0.992649,False
Untransfected_cells-PDE7A,-0.617273,1.479025,-0.417351,0.676426,-3.516306,2.281759,1.0,False


In [None]:
print("Package versions used to generate this jupyter notebook:")
%watermark --iversions