Adapting files for MATLAB modeling

In [1]:
import functions_paper_1
import importlib
from functions_paper_1 import *
exec(open("functions_paper_1.py").read())
# Local path to Sharepoint (on your computer, change if you are a different user)
datadir = Path(Path('../datadir_yb.txt').read_text())
data_folder = './datafiles/modeling_'

In [2]:
# Bioreplicates for ZF37 plasmid titration
# 20240304flow_exp20240301_p3_BioC
# 20240309flow_exp20240306_p1 ('Yunbeen' attune folder)
# 20240309flow_exp20240306_p2 ('Yunbeen' attune folder)
# 20240316flow_exp20240313_p1_ZFtitr ('Sneha' attune folder)
# 20240315flow_exp20240312_p3_ZFtitr

In [3]:
# Import Data - bioreplicate 1 
folder = '20240304flow_exp20240301_p3_BioC'
file_path = datadir/'instruments'/'data'/'attune'/'Sneha'/folder/'export_singlets' #Assign file paths
yaml_path = datadir/'instruments'/'data'/'attune'/'Sneha'/folder/'well_metadata.yaml' #Assign yaml paths 
data1 = rd.flow.load_csv_with_metadata(file_path, yaml_path) #Pull data
data1['bioreplicate'] = folder
data1['mCherry-A'] = data1['mRuby2-A']

  data = pd.concat(data_list, ignore_index=True).replace(np.NaN, pd.NA)  # type: ignore


In [4]:
# Import Data - bioreplicate 2
folder = '20240309flow_exp20240306_p1'
file_path = datadir/'instruments'/'data'/'attune'/'Yunbeen'/folder/'export_singlets' #Assign file paths
yaml_path = datadir/'instruments'/'data'/'attune'/'Yunbeen'/folder/'well_metadata.yaml' #Assign yaml paths 
data2 = rd.flow.load_csv_with_metadata(file_path, yaml_path) #Pull data
data2['bioreplicate'] = folder 

In [5]:
# Import Data - bioreplicate 3
folder = '20240309flow_exp20240306_p2'
file_path = datadir/'instruments'/'data'/'attune'/'Yunbeen'/folder/'export_singlets' #Assign file paths
yaml_path = datadir/'instruments'/'data'/'attune'/'Yunbeen'/folder/'well_metadata.yaml' #Assign yaml paths 
data3 = rd.flow.load_csv_with_metadata(file_path, yaml_path) #Pull data
data3['bioreplicate'] = folder 

In [6]:
# Import Data - bioreplicate 4 
folder = '20240316flow_exp20240313_p1_ZFtitr'
file_path = datadir/'instruments'/'data'/'attune'/'Sneha'/folder/'export_singlets' #Assign file paths
yaml_path = datadir/'instruments'/'data'/'attune'/'Sneha'/folder/'well_metadata.yaml' #Assign yaml paths 
data4 = rd.flow.load_csv_with_metadata(file_path, yaml_path) #Pull data
data4['bioreplicate'] = folder

  data = pd.concat(data_list, ignore_index=True).replace(np.NaN, pd.NA)  # type: ignore


In [7]:
# Import Data - bioreplicate 5 
folder = '20240315flow_exp20240312_p3_ZFtitr'
file_path = datadir/'instruments'/'data'/'attune'/'Sneha'/folder/'export_singlets' #Assign file paths
yaml_path = datadir/'instruments'/'data'/'attune'/'Sneha'/folder/'well_metadata.yaml' #Assign yaml paths 
data5 = rd.flow.load_csv_with_metadata(file_path, yaml_path) #Pull data
data5['bioreplicate'] = folder

  data = pd.concat(data_list, ignore_index=True).replace(np.NaN, pd.NA)  # type: ignore


In [8]:
# simplify df with only necessary stats
col_keep = ['reporter', 'ZF37', 'well', 'mGL-A', 'mCherry-A', 'iRFP670-A', 'bioreplicate']
data1_subset = data1[col_keep]
data2_subset = data2[col_keep]
data3_subset = data3[col_keep]
data4_subset = data4[col_keep]
data5_subset = data5[col_keep]

In [9]:
# combine all 5 bioreplicate for further trimming
data_subset = pd.concat( [data1_subset, data2_subset, data3_subset, data4_subset, data5_subset], ignore_index=True,join='inner')

In [10]:
data_subset['ZF37'].unique()


array([<NA>, 0.00781, 0.03125, 0.25, 0.0, 0.01563, 0.5, 0.125, 1.0,
       0.0625], dtype=object)

In [11]:
data = data_subset.dropna()

In [12]:
data['ZF37'].unique()

array([0.00781, 0.03125, 0.25, 0.0, 0.01563, 0.5, 0.125, 1.0, 0.0625],
      dtype=object)

In [13]:
data['reporter'].unique()

array(['pTET021_203bp 0.1xCre', 'pTET021_203bp 0xCre'], dtype=object)

In [14]:
data.head()

Unnamed: 0,reporter,ZF37,well,mGL-A,mCherry-A,iRFP670-A,bioreplicate
15989,pTET021_203bp 0.1xCre,0.00781,H7,-22,18,12,20240304flow_exp20240301_p3_BioC
15990,pTET021_203bp 0.1xCre,0.00781,H7,62,-7,173,20240304flow_exp20240301_p3_BioC
15991,pTET021_203bp 0.1xCre,0.00781,H7,18,68,61,20240304flow_exp20240301_p3_BioC
15992,pTET021_203bp 0.1xCre,0.00781,H7,0,119,24,20240304flow_exp20240301_p3_BioC
15993,pTET021_203bp 0.1xCre,0.00781,H7,79,105,61,20240304flow_exp20240301_p3_BioC


In [15]:
# gate by positive values
data = data[ data['mCherry-A'] >0]
data = data[ data['mGL-A'] >0]
data = data[ data['iRFP670-A'] >0]
data['log10 mGL-A'] = np.log10(data['mGL-A'])

In [16]:
# gate by iRFP670 values
iRFP670_gate = 10000 
data_iRFP670gated = data[ data['iRFP670-A'] > iRFP670_gate]

In [17]:
df = data_iRFP670gated #choose dataframe

#Decide how to group. The last parameter will be the condition that it is group by by last ('well' or 'bioreplicate')
by = ['reporter', 'ZF37','bioreplicate'] #how to filter results, columns 

x = ['mGL-A', 'mCherry-A'] #which parameter to calculate results on
stat = [sp.stats.gmean] #statistics to calculate
s_bioreplicate = calc_stats(df,by,x,stat) #calculate statistics 

In [18]:
display(s_bioreplicate)

Unnamed: 0,reporter,ZF37,bioreplicate,mGL-A_gmean,mCherry-A_gmean,Fraction,Count
0,pTET021_203bp 0.1xCre,0.0,20240304flow_exp20240301_p3_BioC,62.851683,40.812411,0.043098,1300
1,pTET021_203bp 0.1xCre,0.0,20240309flow_exp20240306_p1,60.212777,37.926050,0.178988,5399
2,pTET021_203bp 0.1xCre,0.0,20240309flow_exp20240306_p2,59.184813,39.355992,0.399848,12061
3,pTET021_203bp 0.1xCre,0.0,20240315flow_exp20240312_p3_ZFtitr,99.573291,39.927361,0.106816,3222
4,pTET021_203bp 0.1xCre,0.0,20240316flow_exp20240313_p1_ZFtitr,66.793929,40.743150,0.271250,8182
...,...,...,...,...,...,...,...
83,pTET021_203bp 0xCre,1.0,20240304flow_exp20240301_p3_BioC,268.451003,2026.370856,0.033546,2058
84,pTET021_203bp 0xCre,1.0,20240309flow_exp20240306_p1,214.216743,2097.058555,0.145041,8898
85,pTET021_203bp 0xCre,1.0,20240309flow_exp20240306_p2,190.113120,1805.417439,0.338642,20775
86,pTET021_203bp 0xCre,1.0,20240315flow_exp20240312_p3_ZFtitr,553.113514,2744.031970,0.064240,3941


In [19]:
s_125x_noCre = s_bioreplicate[(s_bioreplicate['ZF37']==0.125)&(s_bioreplicate['reporter']=='pTET021_203bp 0xCre')]
display(s_125x_noCre)

Unnamed: 0,reporter,ZF37,bioreplicate,mGL-A_gmean,mCherry-A_gmean,Fraction,Count
68,pTET021_203bp 0xCre,0.125,20240304flow_exp20240301_p3_BioC,284.434091,392.773937,0.049121,2504
69,pTET021_203bp 0xCre,0.125,20240309flow_exp20240306_p1,241.568346,338.738721,0.197034,10044
70,pTET021_203bp 0xCre,0.125,20240309flow_exp20240306_p2,215.293458,286.955465,0.445975,22734
71,pTET021_203bp 0xCre,0.125,20240315flow_exp20240312_p3_ZFtitr,597.003682,457.403146,0.051377,2619
72,pTET021_203bp 0xCre,0.125,20240316flow_exp20240313_p1_ZFtitr,487.810182,229.954029,0.256493,13075


In [20]:
bioreps_plot = pd.array(['20240304flow_exp20240301_p3_BioC',
                         '20240309flow_exp20240306_p1',
                         '20240309flow_exp20240306_p2', 
                         '20240316flow_exp20240313_p1_ZFtitr',
                         '20240315flow_exp20240312_p3_ZFtitr'
                         ])

# initialization for all datapoints
# data_iRFP670gated['Normalized_mGL-A_gmean'] = 0
# data_iRFP670gated['Normalized_mCherry-A_gmean'] = 0

for biorep in bioreps_plot:

    mGL_125x = s_bioreplicate[(s_bioreplicate['bioreplicate']==biorep)&(s_bioreplicate['reporter'] == 'pTET021_203bp 0xCre') & (s_bioreplicate['ZF37'] == 0.125)]['mGL-A_gmean'].iloc[0]
    display(mGL_125x)

    mCherry_125x = s_bioreplicate[(s_bioreplicate['bioreplicate']==biorep)&(s_bioreplicate['reporter'] == 'pTET021_203bp 0xCre') & (s_bioreplicate['ZF37'] == 0.125)]['mCherry-A_gmean'].iloc[0]
    display(mCherry_125x)
    
    # for every data point
    # condition = data_iRFP670gated['bioreplicate'] == biorep
    # data_iRFP670gated.loc[condition, 'Normalized_mGL-A_gmean'] = data_iRFP670gated.loc[condition, 'mGL-A'] / mGL_125x
    # data_iRFP670gated.loc[condition, 'Normalized_mCherry-A_gmean'] = data_iRFP670gated.loc[condition, 'mCherry-A'] / mCherry_125x

    condition = s_bioreplicate['bioreplicate'] == biorep
    s_bioreplicate.loc[condition, 'Normalized_mGL-A_gmean'] = s_bioreplicate.loc[condition, 'mGL-A_gmean'] / mGL_125x
    s_bioreplicate.loc[condition, 'Normalized_mCherry-A_gmean'] = s_bioreplicate.loc[condition, 'mCherry-A_gmean'] / mCherry_125x

284.4340908415765

392.773936536853

241.56834561903875

338.73872144408915

215.29345828750553

286.9554646874573

487.81018171206273

229.95402873297434

597.0036822597077

457.40314580259275

In [21]:
display(s_bioreplicate)

Unnamed: 0,reporter,ZF37,bioreplicate,mGL-A_gmean,mCherry-A_gmean,Fraction,Count,Normalized_mGL-A_gmean,Normalized_mCherry-A_gmean
0,pTET021_203bp 0.1xCre,0.0,20240304flow_exp20240301_p3_BioC,62.851683,40.812411,0.043098,1300,0.220971,0.103908
1,pTET021_203bp 0.1xCre,0.0,20240309flow_exp20240306_p1,60.212777,37.926050,0.178988,5399,0.249258,0.111963
2,pTET021_203bp 0.1xCre,0.0,20240309flow_exp20240306_p2,59.184813,39.355992,0.399848,12061,0.274903,0.137150
3,pTET021_203bp 0.1xCre,0.0,20240315flow_exp20240312_p3_ZFtitr,99.573291,39.927361,0.106816,3222,0.166788,0.087291
4,pTET021_203bp 0.1xCre,0.0,20240316flow_exp20240313_p1_ZFtitr,66.793929,40.743150,0.271250,8182,0.136926,0.177180
...,...,...,...,...,...,...,...,...,...
83,pTET021_203bp 0xCre,1.0,20240304flow_exp20240301_p3_BioC,268.451003,2026.370856,0.033546,2058,0.943807,5.159128
84,pTET021_203bp 0xCre,1.0,20240309flow_exp20240306_p1,214.216743,2097.058555,0.145041,8898,0.886775,6.190785
85,pTET021_203bp 0xCre,1.0,20240309flow_exp20240306_p2,190.113120,1805.417439,0.338642,20775,0.883042,6.291629
86,pTET021_203bp 0xCre,1.0,20240315flow_exp20240312_p3_ZFtitr,553.113514,2744.031970,0.064240,3941,0.926483,5.999154


In [22]:
data_subset_noCre = s_bioreplicate[s_bioreplicate['reporter']== 'pTET021_203bp 0xCre']
data_subset_Cre = s_bioreplicate[s_bioreplicate['reporter']== 'pTET021_203bp 0.1xCre']

In [23]:
data_subset_noCre['ZF37'].unique()

array([0.     , 0.00781, 0.01563, 0.03125, 0.0625 , 0.125  , 0.25   ,
       0.5    , 1.     ])

In [24]:
data_subset_noCre['ZF37'] = data_subset_noCre['ZF37'].astype(str)
data_subset_Cre['ZF37'] = data_subset_Cre['ZF37'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_subset_noCre['ZF37'] = data_subset_noCre['ZF37'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_subset_Cre['ZF37'] = data_subset_Cre['ZF37'].astype(str)


In [25]:
mean_mGL_noCre_0zf = data_subset_noCre[data_subset_noCre['ZF37']==0]['Normalized_mGL-A_gmean'].mean()
mean_mCherry_noCre_0zf = data_subset_noCre[data_subset_noCre['ZF37']==0]['Normalized_mCherry-A_gmean'].mean()

In [26]:
zf_target_conc = pd.array(['0.0', '0.00781', '0.01563', '0.03125', '0.0625', '0.125', '0.25', '0.5', '1.0'])

# initialization for mean of gmeans
noCre_mean = pd.DataFrame(columns=['zf_conc', 'mean_mGL_noCre', 'mean_mCherry_noCre'])
Cre_mean = pd.DataFrame(columns=['zf_conc', 'mean_mGL_Cre', 'mean_mCherry_Cre'])

for zf_conc in zf_target_conc:

    mean_mGL_noCre = data_subset_noCre[data_subset_noCre['ZF37']==zf_conc]['Normalized_mGL-A_gmean'].mean()
    mean_mCherry_noCre = data_subset_noCre[data_subset_noCre['ZF37']==zf_conc]['Normalized_mCherry-A_gmean'].mean()

    mean_mGL_Cre = data_subset_Cre[data_subset_Cre['ZF37']==zf_conc]['Normalized_mGL-A_gmean'].mean()
    mean_mCherry_Cre = data_subset_Cre[data_subset_Cre['ZF37']==zf_conc]['Normalized_mCherry-A_gmean'].mean()

    noCre_mean.loc[len(noCre_mean)] = [zf_conc, mean_mGL_noCre, mean_mCherry_noCre]

    Cre_mean.loc[len(Cre_mean)] = [zf_conc, mean_mGL_Cre, mean_mCherry_Cre]

In [27]:
display(noCre_mean)

Unnamed: 0,zf_conc,mean_mGL_noCre,mean_mCherry_noCre
0,0.0,0.123609,0.115317
1,0.00781,0.42386,0.213728
2,0.01563,0.622388,0.297223
3,0.03125,0.744024,0.408911
4,0.0625,0.868208,0.598726
5,0.125,1.0,1.0
6,0.25,1.11218,1.899059
7,0.5,1.053172,3.486967
8,1.0,0.964752,6.122361


In [29]:
noCre_file_path = data_folder + 'noCre_ZF37titration.xlsx'
data_subset_noCre.to_excel(noCre_file_path, index=False)

Cre_file_path = data_folder + 'Cre_ZF37titration.xlsx'
data_subset_Cre.to_excel(Cre_file_path, index=False)

In [30]:
mean_noCre_file_path = data_folder + 'MEAN_noCre_ZF37titration.xlsx'
noCre_mean.to_excel(mean_noCre_file_path, index=False)

mean_Cre_file_path = data_folder + 'MEAN_Cre_ZF37titration.xlsx'
Cre_mean.to_excel(mean_Cre_file_path, index=False)

Do calculations for each biorep

In [None]:
# subset data_subset_noCre and data_subset_Cre per biorep
# 20240304flow_exp20240301_p3_BioC
# 20240309flow_exp20240306_p1 ('Yunbeen' attune folder) -- missing 0.0625xZF, 16 instead of 18
# 20240309flow_exp20240306_p2 ('Yunbeen' attune folder)
# 20240316flow_exp20240313_p1_ZFtitr ('Sneha' attune folder)
# 20240315flow_exp20240312_p3_ZFtitr

In [None]:
bioreps = pd.array(['20240304flow_exp20240301_p3_BioC',
                    '20240309flow_exp20240306_p1',
                    '20240309flow_exp20240306_p2',
                    '20240316flow_exp20240313_p1_ZFtitr',
                    '20240315flow_exp20240312_p3_ZFtitr'])

# Iterate over bioreps with index
for i, biorep in enumerate(bioreps):
    # Subset based on condition for noCre
    globals()[f"data_noCre_{i}"] = data_subset_noCre[data_subset_noCre['bioreplicate'] == biorep].copy()
    # Subset based on condition for Cre
    globals()[f"data_Cre_{i}"] = data_subset_Cre[data_subset_Cre['bioreplicate'] == biorep].copy()

In [None]:
display(data_noCre_0)
display(data_Cre_0)

In [None]:
biorep0_noCre_file_path = data_folder + 'noCre_ZF37tit_biorep0.xlsx'
data_noCre_0.to_excel(biorep0_noCre_file_path, index=False)

biorep0_Cre_file_path = data_folder + 'Cre_ZF37tit_biorep0.xlsx'
data_Cre_0.to_excel(biorep0_Cre_file_path, index=False)

In [None]:
biorep1_noCre_file_path = data_folder + 'noCre_ZF37tit_biorep1.xlsx'
data_noCre_1.to_excel(biorep1_noCre_file_path, index=False)

biorep1_Cre_file_path = data_folder + 'Cre_ZF37tit_biorep1.xlsx'
data_Cre_1.to_excel(biorep1_Cre_file_path, index=False)

In [None]:
biorep2_noCre_file_path = data_folder + 'noCre_ZF37tit_biorep2.xlsx'
data_noCre_2.to_excel(biorep2_noCre_file_path, index=False)

biorep2_Cre_file_path = data_folder + 'Cre_ZF37tit_biorep2.xlsx'
data_Cre_2.to_excel(biorep2_Cre_file_path, index=False)

In [None]:
biorep3_noCre_file_path = data_folder + 'noCre_ZF37tit_biorep3.xlsx'
data_noCre_3.to_excel(biorep3_noCre_file_path, index=False)

biorep3_Cre_file_path = data_folder + 'Cre_ZF37tit_biorep3.xlsx'
data_Cre_3.to_excel(biorep3_Cre_file_path, index=False)

In [None]:
biorep4_noCre_file_path = data_folder + 'noCre_ZF37tit_biorep4.xlsx'
data_noCre_4.to_excel(biorep4_noCre_file_path, index=False)

biorep4_Cre_file_path = data_folder + 'Cre_ZF37tit_biorep4.xlsx'
data_Cre_4.to_excel(biorep4_Cre_file_path, index=False)