### The following notebook was used to unblind and compile all of the S1 data for the NP Manuscript

In [1]:
# Importing the packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pathlib as plb
import seaborn as sns
from scipy import stats
import statistics
import dabest as db
import math
from scipy.stats import norm
import scipy
from math import log
from collections import Counter

#### Reading in and cleaning up blinded metadata and compound key

In [2]:
md = pd.read_csv('C:/Users/Emily/Documents/S1/metadata/S1_metadata.csv', delimiter=',', encoding='utf-8-sig')
md['Compound library ID'] = md['Compound library ID'].str[-1]
#md.drop(md.columns[[3, 4, 12, 13, 16, 17, 18, 19, 20, 21]], axis = 1, inplace = True)
md = md.drop(md.index[0], axis=0)
md.head()

Unnamed: 0,Date:,Recorder,Plate ID,Different strains in each well of a single assay plate? (Y/N),Different compounds in each well of a single assay plate? (Y/N),Compound library ID,Compound Well A,Compound Well B,Compound Well C,Compound Well D,...,Chemotaxis Start (24 hrs format),Chemotaxis End (24 hrs format),Image ID,Scanner Slot:,Scanner Slot,Scanner #,Strain Well A,Strain Well B,Strain Well C,Strain Well D
1,8/23/21,Hodan,S1_R1_1,,,1,B2,B3,B4,B5,...,11:10,12:10,S1_001,1,,3,N2,N2,N2,N2
2,8/23/21,Hodan,S1_R1_2,,,1,C2,C3,C4,C5,...,11:10,12:10,S1_001,2,,3,N2,N2,N2,N2
3,8/23/21,Hodan,S1_R1_3,,,1,D2,D3,D4,D5,...,11:10,12:10,S1_001,3,,3,N2,N2,N2,N2
4,8/23/21,Hodan,S1_R1_4,,,1,E2,E3,E4,E5,...,11:10,12:10,S1_001,4,,3,N2,N2,N2,N2
5,8/23/21,Hodan,S1_R1_5,,,1,F2,F3,F4,F5,...,11:10,12:10,S1_002,1,,4,N2,N2,N2,N2


In [3]:
compound_key = pd.read_csv('C:/Users/Emily/Documents/S1/metadata/S1_randomized_compounds.csv', index_col=0)
compound_key['Plate Number'] =  compound_key['Plate Number'].str.strip().str[-1]
compound_key.tail()


Unnamed: 0,CAS ID,Compound,Num,Let,Compound Well,Plate Number
91,137-32-6,2-Methyl-1-butanol,5,F,F5,4
92,3387-41-5,Sabinene,2,G,G2,4
93,462-94-2,Cadaverine,3,G,G3,4
94,105-87-3,Geranyl Acetate,4,G,G4,4
95,508-02-1,Oleanolic Acid,5,G,G5,4


#### Generating functions to unblind the compounds in the metadata

In [4]:
def add_compoundA(row, compound_map ):
    compound = compound_map.loc[
        (compound_map['Plate Number']==row['Compound library ID']) & 
        (compound_map['Compound Well']==row['Compound Well A'])]['Compound']
    return compound.values[0]

def add_compoundB(row, compound_map ):
    compound = compound_map.loc[
        (compound_map['Plate Number']==row['Compound library ID']) & 
        (compound_map['Compound Well']==row['Compound Well B'])]['Compound']
    #print(compound.values)
    return compound.values[0]

def add_compoundC(row, compound_map ):
    compound = compound_map.loc[
        (compound_map['Plate Number']==row['Compound library ID']) & 
        (compound_map['Compound Well']==row['Compound Well C'])]['Compound']
    return compound.values[0]

def add_compoundD(row, compound_map ):
    compound = compound_map.loc[
        (compound_map['Plate Number']==row['Compound library ID']) & 
        (compound_map['Compound Well']==row['Compound Well D'])]['Compound']
    return compound.values[0]

In [5]:
md['Compound A'] = md.apply(
    lambda row: add_compoundA(row, compound_key), axis=1)

md['Compound B'] = md.apply(
    lambda row: add_compoundB(row, compound_key), axis=1)
    
md['Compound C'] = md.apply(
    lambda row: add_compoundC(row, compound_key), axis=1)
    
md['Compound D'] = md.apply(
    lambda row: add_compoundD(row, compound_key), axis=1)

md['Scanner Slot:'] = md['Scanner Slot:'].apply(str)

#### Reading in the Image Analysis summary file. 
<p> The metadata sheet will be used to fill in missing fields on the Image Analysis summary file including Compound, Strain and Plate ID </p>

In [6]:
ia_data = pd.read_csv('C:/Users/Emily/Documents/S1/image_analysis/S1_All_reps.csv', index_col=0)
ia_data = ia_data.drop(['Large Object'], axis=1)

#### Calculating coefficiant of variation for the total number of worms dispensed by the liquid handler

In [7]:
t = ia_data['Total Worms'].describe()
print(t)
print('Coefficient of variation = '+str(t['std']/t['mean']*100))

count    312.000000
mean     281.314103
std       72.736392
min       83.000000
25%      223.750000
50%      280.000000
75%      334.250000
max      467.000000
Name: Total Worms, dtype: float64
Coefficient of variation = 25.85593511051879


#### Generating a function to add the plate ID and compound name to the summary data

In [11]:
def add_PlateID(row, metadata):
    slotID = row['WellNo'][0]
    pid = metadata.loc[
        (metadata['Image ID']==row['File Name']) & 
        (metadata['Scanner Slot:']==slotID)]['Plate ID']
    return pid.values[0]

In [12]:
ia_data['Plate ID'] = ia_data.apply(
    lambda row: add_PlateID(row, md), axis=1)

In [13]:
def add_Compound(row, metadata):
    wellID = row['WellNo'][1]
    if wellID == 'A':
        compound = metadata.loc[metadata['Plate ID']==row['Plate ID']]['Compound A']
    elif wellID == 'B':
        compound = metadata.loc[metadata['Plate ID']==row['Plate ID']]['Compound B']
    elif wellID == 'C':
        compound = metadata.loc[metadata['Plate ID']==row['Plate ID']]['Compound C']
    elif wellID == 'D':
        compound = metadata.loc[metadata['Plate ID']==row['Plate ID']]['Compound D']
    return compound.values[0]

In [14]:
ia_data['Compound'] = ia_data.apply(
    lambda row: add_Compound(row, md), axis=1)

#### Filtering the dataset to only include replicates with 150 worms or more
<p> We also want to identify any compounds with fewer than 3 biological replicates with over 150 worms<p>

In [15]:
ia_data = ia_data.loc[ia_data['Total Worms']>= 150] 
ia_data['comp_count'] = ia_data.groupby('Compound')['Compound'].transform('count')
ia_data.loc[ia_data['comp_count']<3] 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,WellNo,Total Worms,Chemotaxis,Compound,Strain,File Name,Well width,Plate ID,comp_count
0,1A,329.0,0.212121,Paeoniflorin,,S1_005,3039.0,S1_R1_17,2
2,1C,313.0,0.828179,Isoamyl alcohol,,S1_005,3030.0,S1_R1_17,2
10,3C,268.0,0.451327,Phenylacetylene,,S1_011,3058.0,S1_R2_19,2
8,3A,227.0,0.431472,Paeoniflorin,,S1_009,3050.0,S1_R2_11,2
10,3C,189.0,0.847059,Isoamyl alcohol,,S1_009,3066.0,S1_R2_11,2
2,1C,228.0,0.215311,Phenylacetylene,,S1_021,3024.0,S1_R4_7,2
3,1D,191.0,0.58427,2-Methyl-1-butanol,,S1_017,3017.0,S1_R3_17,2
11,3D,190.0,0.780347,2-Methyl-1-butanol,,S1_012,3055.0,S1_R2_23,2


#### Reading in all of the files that contain worm positions based on the exclusion criteria above

In [17]:
wrm_locs_fldr = plb.Path('C:/Users/Emily/Documents/S1/image_analysis/')

In [18]:
def get_worm_locs(row, wrms, result_dict): 

    fname = row['File Name']
    wellnum = row['WellNo']
    loc_fname =  wrms.joinpath('loc_' + fname + '_' + wellnum + '.csv')
    temp = pd.read_csv(loc_fname)
    compound = row['Compound']
    xs = temp['X']
    mean_pos = xs.mean()
    #df.loc[len(df.index)] = [compound, mean_pos] 
    if compound in result_dict:
        result_dict[compound] = result_dict[compound].append(xs)
        result_dict[compound].reset_index(inplace=True, drop=True)
        
        #result_dict[compound] = result_dict[compound]+xs
    else:
        result_dict[compound]=xs
    
    return result_dict, mean_pos

In [20]:
ia_data = ia_data.sort_values(['Compound', 'File Name'], ascending=[True, True])

#### To perform statistical analyses we need to pool all of the worm positiions from each biological replicate for each condition.

<p> We also want to capture the first 3 biological replicates performed for each condition. Some conditions were captured 4 times due to errors made during the screening process <p>

In [21]:
cols = list(ia_data.columns)
cols.append('MeanPos')
means_df = pd.DataFrame(columns=cols)

In [22]:
# Need to create an empty dictionary to hold the values
results_dict = {}
compound = ''
i=0
idx = 0

for index, row in ia_data.iterrows():
    if row['Compound'] == compound:
        i += 1
        compound = row['Compound']
        means_df.loc[idx] = row.values.tolist() + [m]
        if i < 3:
            pooled, m = get_worm_locs(row, wrm_locs_fldr, results_dict)
        else:
            continue
    else:
        i = 0
        compound = row['Compound']
        pooled, m =  get_worm_locs(row, wrm_locs_fldr, results_dict)
        means_df.loc[idx] = row.values.tolist() + [m]
    idx += 1

#Remember that Dabest requires a dataframe. Casting dict to df
pooled_df = pd.DataFrame.from_dict(pooled)

#### Converting the worm locations from measurements in dots per inch to millimeters

In [23]:
middle = ia_data['Well width'].mean()/2

# 1 inch = 25.4mm
mm = 25.4
# 1200 pixels per 25.4mm
px_mm = 1200/mm

middle_mm = middle/px_mm

#The following transforms the worm location data so that it is respective to the start zone
# The start zone is at the center of the image; Start Zone = 0mm
#Worms with positive values are closer to the compound, negative values are away from the compound
mm_df = pooled_df.apply(lambda x: -(x/px_mm)+middle_mm)
means_df['MeanPos'] = means_df['MeanPos'].apply(lambda x: -(int(x)/px_mm)+32.5)
#mm_df.to_csv('C:/Users/Emily/Documents/S1/S1_xs3.csv')
#means_df.to_csv('C:/Users/Emily/Documents/S1/S1_3meansDF.csv')

#### Creating an ordered list of compounds to pass to Dabest to calculate confidence intervals
<p> The control condition always needs to be the first item in the list

In [24]:
sums = ia_data.groupby('Compound').mean().reset_index()
ia_sort = sums.copy()
condition = (ia_sort.Compound=='DMSO') | (ia_sort.Compound=='H2O')

excluded = ia_sort[condition]
included = ia_sort[~condition]
sortd = included.sort_values(by='Chemotaxis',ascending=True)
ia_sort = pd.concat([excluded, sortd])

cmpd_ordr = ia_sort['Compound'].to_list()

#### Generating confidence intervals using DMSO as the control group

In [25]:
db_obj = db.load(mm_df, idx=(cmpd_ordr))
pooled_mm_obj = db.load(mm_df, idx=(cmpd_ordr))
results_df_mm = pooled_mm_obj.mean_diff.results

In [26]:
results_df_mm_srt = pd.DataFrame()
results_df_mm_srt = results_df_mm.sort_values(by='difference', ascending=False)

#### Filtering the DMSO analysis for compounds with confidence intervals that do not span the mean of position of worms against DMSO

In [28]:
same = results_df_mm.loc[(results_df_mm['bca_low'] < 0) & (results_df_mm['bca_high'] > 0)]
same_comps = same['test'].to_list()
diff = results_df_mm[~results_df_mm['test'].isin(same_comps)]
diff = diff.append(results_df_mm.loc[results_df_mm['test']=='H2O'])
diff = diff.sort_values(['difference'])

Unnamed: 0,control,test,control_N,test_N,effect_size,is_paired,difference,ci,bca_low,bca_high,...,permutations,pvalue_permutation,permutation_count,permutations_var,pvalue_welch,statistic_welch,pvalue_students_t,statistic_students_t,pvalue_mann_whitney,statistic_mann_whitney
1,DMSO,1-octanol,1068,652,mean difference,,-7.446258,95,-8.944625,-5.916856,...,"[-1.1430490002910472, 1.4386948704839808, -0.8...",0.0,5000,"[0.6585255360728213, 0.6761347742103405, 0.665...",1.049841e-21,9.717362,2.703217e-20,9.347132,1.296941e-18,436169.5
2,DMSO,Phytol,1068,625,mean difference,,-6.248916,95,-7.80029,-4.622622,...,"[-0.5293550262172286, 2.1863869078651685, -0.6...",0.0,5000,"[0.7128926978311687, 0.7240487727636635, 0.718...",5.494421e-14,7.599271,1.175689e-13,7.48141,1.81106e-13,405215.0
4,DMSO,2-nonanone,1068,867,mean difference,,-4.575976,95,-6.02,-3.096992,...,"[-0.04080075727140331, -0.49626449150931573, 0...",0.0,5000,"[0.589100770913261, 0.5913009546492195, 0.5858...",1.936083e-09,6.0328,2.22725e-09,6.008853,3.639988e-09,535088.0
3,DMSO,Ellagic acid,1068,890,mean difference,,-3.744019,95,-5.184462,-2.379066,...,"[-0.7262108926342077, 0.31748414481897647, -0....",0.0,5000,"[0.5399771273594572, 0.544246602476453, 0.5378...",2.462091e-07,5.178916,3.299728e-07,5.123185,2.051909e-06,534408.5
6,DMSO,Salvinorin A Propionate,1068,926,mean difference,,-3.313405,95,-4.793185,-1.846934,...,"[-0.657435423593079, 0.82918033680901, 0.07609...",0.0,5000,"[0.5772476773733484, 0.5815060875268204, 0.574...",1.228296e-05,4.383759,1.248376e-05,4.380002,1.484407e-05,550018.5
7,DMSO,Spinosad,1068,900,mean difference,,-2.79694,95,-4.280256,-1.379054,...,"[0.35291545359966703, 1.2975428277153556, 0.31...",0.0,5000,"[0.5644856847180117, 0.5683520985588539, 0.562...",0.0001844794,3.746616,0.0001963781,3.730681,0.0003352161,525641.0
8,DMSO,Camphor,1068,941,mean difference,,-2.756356,95,-4.166159,-1.342275,...,"[-1.2371582503141, 1.0074922674035252, 0.59604...",0.0004,5000,"[0.5387701832513379, 0.543946188584805, 0.5400...",0.0001627035,3.778015,0.0001743311,3.760641,0.0003707904,548684.0
10,DMSO,Ursolic acid,1068,884,mean difference,,-2.229335,95,-3.7373,-0.736521,...,"[0.448795964885522, -0.22598834319092065, 0.05...",0.0036,5000,"[0.598257663937518, 0.603274626153733, 0.59898...",0.003954668,2.885328,0.003934319,2.886827,0.003827441,507905.5
19,DMSO,Safranal,1068,848,mean difference,,-2.081202,95,-3.541572,-0.588647,...,"[-1.1703833747026122, 0.07502875459331482, -1....",0.0068,5000,"[0.5682300846217729, 0.5737083081246381, 0.565...",0.00529848,2.791574,0.005740397,2.76537,0.009653988,483961.5
5,DMSO,Methyl palmitate,1068,935,mean difference,,-1.978422,95,-3.417955,-0.5669,...,"[-0.4353599120751469, 0.027083065619846414, -1...",0.0062,5000,"[0.5450191965007933, 0.5492620227464795, 0.545...",0.007167672,2.691734,0.007404997,2.680797,0.01117691,532054.0


#### Swapping the list order so that water is the control group

In [30]:
def swap_rows(df, i1, i2):
    a, b = df.iloc[0], df.iloc[1]
    df.iloc[i1, :], df.iloc[i2, :] = b, a
    return df

swapped = swap_rows(excluded, 0, 1)
h20_analysis = swapped.append(included)

Unnamed: 0,Compound,Total Worms,Chemotaxis,Strain,Well width,comp_count
91,p-Anisic acid,341.333333,-0.076448,,3046.333333,3.0
92,p-Tolualdehyde,291.0,0.189825,,3039.666667,3.0
93,trans-Cinnamaldehyde,293.0,0.019462,,3051.666667,3.0
94,α-Phellandrene,259.0,0.341553,,3045.333333,3.0
95,β-Citronellol,260.75,0.122218,,3038.0,4.0


#### Calculating confidence intervals using water as the control group

In [31]:
h20_ordr = h20_analysis['Compound'].to_list()
h20_obj = db.load(mm_df, idx=(h20_ordr))
h20_md_results = h20_obj.mean_diff.results

In [32]:
h20_md_results_srt = pd.DataFrame()
h20_md_results_srt = h20_md_results.sort_values(by='difference', ascending=False)

In [40]:
results_df_mm_srt = results_df_mm_srt.append(h20_md_results_srt)
results_df_mm = results_df_mm.append(h20_md_results)
#results_df_mm.to_csv('C:/Users/Emily/Documents/S1/S1_3.csv')
#results_df_mm_srt.to_csv('/Users/Emily/Documents/S1/TableS2_wnums.csv')

190


#### Filtering the water analysis for compounds with confidence intervals that do not span the mean of position of worms against water

In [34]:
h2o_same = h20_md_results.loc[(h20_md_results['bca_low'] < 0) & (h20_md_results['bca_high'] > 0)]
h2o_same_comps = h2o_same['test'].to_list()
h2o_diff = h20_md_results[~h20_md_results['test'].isin(h2o_same_comps)]
h2o_diff = h2o_diff.append(h20_md_results.loc[h20_md_results['test']=='DMSO'])
h2o_diff = h2o_diff.sort_values(['difference'], ascending=False)

#### Merging the water and DMSO analysis to reflect only compounds that are found in both analyses
<p> Compounds found on the resulting dataset will be retested using the the tax-4 null, osm-9 null and tax-4::osm-9 double null mutant strains

In [35]:
inner = pd.merge(h2o_diff, diff , on=["test"])

42


In [36]:
inner = inner.append(h2o_diff.loc[h2o_diff['test']=='DMSO'])
inner = inner.append(diff.loc[diff['test']=='H2O'])

In [37]:
comps = inner['test']
all_control_results = results_df_mm.loc[results_df_mm['test'].isin(comps)]
#all_control_results.to_csv('/Users/Emily/Documents/S1/toMutantScreen_mdiff.csv')

43


### Benjemini-Hochberg (BH) correction analysis

In [91]:
data = results_df_mm_srt.copy()
cols = ['control', 'test', 'difference', 'bca_low', 'bca_high']
data = data[cols]
#data.drop(columns=['control', 'test', 'difference', 'bca_low', 'bca_high'])
data.head()

Unnamed: 0,control,test,difference,bca_low,bca_high
94,DMSO,Isoamyl alcohol,14.166999,12.820407,15.511796
93,DMSO,2-Methyl-1-butanol,11.712247,9.891178,13.409462
92,DMSO,Thiophene,9.759898,8.202217,11.2633
90,DMSO,"2,3-Dihydrobenzofuran",9.194467,7.629789,10.673937
91,DMSO,Diacetyl,8.018781,6.589946,9.361027


In [92]:
sub = ['control', 'test', 'difference', 'bca_low', 'bca_high']
data = pd.read_csv('/Users/Emily/Documents/S1/S1_3.csv', usecols=sub)


#### Defining functions to calculate BH correction and p values

In [93]:
def CItoP(row):
    u = row['bca_high']
    l = row['bca_low']
    Est = row['difference']
    #calculate the standard error from the upper and lower bounds of the confidence interval
    SE = (u-l)/(2*1.96)
    #calculate the z-score from the difference in means (estimated mean) over the standard error
    z = Est/SE
    #find the area under the curve of the CDF for the associated z-score using scipy.stats.special.ndtr or scipy.stats.norm.sf
    #equations to integrate the area under the curve for both functions are the same.
    p = norm.sf(abs(z))*2 #two-sided
    return p

In [94]:
# adding p values to the dataframe
for index, row in data.iterrows():
    data.loc[index, 'p_value'] = CItoP(row)


In [95]:
# i = pvalue rank
# m = total number of tests
# Q = false discovery rate percentage

def BH_critical(i, m, Q):
    correction = (i/m)*Q
    return correction

In [96]:
#Creating ranks for the BH correction
def pranks(df, fdr):
    hed = 'BH_correction_' + str(fdr)
    for index, row in df.iterrows():
        df.loc[index, hed] = BH_critical(row['rank'],96,fdr)
    return df

In [97]:
#Iterating through the dataframe 
vals96 = pd.DataFrame()
controls = ['DMSO', 'H2O']

for c in controls:
    h = data.loc[data['control']==c]
    ranked = h.sort_values(by=['p_value']).reset_index(drop=True)
    ranks=np.arange(1,len(ranked)+1)
    ranked['rank'] = ranks
    BH = pranks(ranked, .05)
    vals96 = vals96.append(BH)
vals96.tail()

Unnamed: 0,control,test,difference,bca_low,bca_high,p_value,rank,BH_correction_0.05
90,H2O,Eucalyptol,0.131015,-1.52033,1.710998,0.873718,91,0.047396
91,H2O,Carnosic acid,0.116052,-1.502566,1.752305,0.888844,92,0.047917
92,H2O,Rosmarinic acid,-0.100297,-1.653787,1.484949,0.900316,93,0.048438
93,H2O,Trans-Anethole,-0.072106,-1.680317,1.456152,0.928192,94,0.048958
94,H2O,Skatole,0.017496,-1.500906,1.571867,0.982193,95,0.049479


In [98]:
vals96.head()

Unnamed: 0,control,test,difference,bca_low,bca_high,p_value,rank,BH_correction_0.05
0,DMSO,Isoamyl alcohol,14.166999,12.820407,15.511796,1.353903e-94,1,0.000521
1,DMSO,2-Methyl-1-butanol,11.712247,9.891178,13.409462,6.392657000000001e-39,2,0.001042
2,DMSO,Thiophene,9.759898,8.202217,11.2633,7.612012e-36,3,0.001563
3,DMSO,"2,3-Dihydrobenzofuran",9.194467,7.629789,10.673937,2.428362e-32,4,0.002083
4,DMSO,Diacetyl,8.018781,6.589946,9.361027,7.992905e-30,5,0.002604


In [99]:
#Finding compounds where the p value is less than the BH correction
filtered_BH = vals96.loc[vals96['p_value'] < vals96['BH_correction_0.05']]
filtered_BH

Unnamed: 0,control,test,difference,bca_low,bca_high,p_value,rank,BH_correction_0.05
0,DMSO,Isoamyl alcohol,14.166999,12.820407,15.511796,1.353903e-94,1,0.000521
1,DMSO,2-Methyl-1-butanol,11.712247,9.891178,13.409462,6.392657e-39,2,0.001042
2,DMSO,Thiophene,9.759898,8.202217,11.263300,7.612012e-36,3,0.001563
3,DMSO,"2,3-Dihydrobenzofuran",9.194467,7.629789,10.673937,2.428362e-32,4,0.002083
4,DMSO,Diacetyl,8.018781,6.589946,9.361027,7.992905e-30,5,0.002604
...,...,...,...,...,...,...,...,...
35,H2O,Oleanolic Acid,-2.178525,-3.833409,-0.583985,8.586267e-03,36,0.018750
36,H2O,Ethyl palmitate,2.155766,0.531007,3.781640,9.331073e-03,37,0.019271
37,H2O,Piperonyl Alcohol,1.872080,0.430105,3.296847,1.047040e-02,38,0.019792
38,H2O,p-Tolualdehyde,1.987114,0.397433,3.483390,1.159710e-02,39,0.020313


In [100]:
#Comparing the "significant" hits between DMSO and H2O controls. What compounds are found in both sets?
test = filtered_BH['test']
counts = Counter(test)
output = [value for value, count in counts.items() if count > 1]

In [101]:
#Next we want to find which compounds we would have omitted from our mutant screens if we had performed BH-corrections
followups = pd.read_csv('/Users/Emily/Documents/S1/inner3.csv', index_col=0)
comps = list(followups['test'].unique())
set1 = set(comps)
set2 = set(output)
missing = list(sorted(set1 - set2))
missing

['DMSO', 'H2O', 'Oleanolic Acid', 'Sabinene', 'Sinomenine hydrochloride']

#### Creating a clean and sorted Supplemental Table 2

In [41]:
dmso = vals96.loc[vals96['control']=='DMSO']
dmso = dmso.sort_values(by='difference', ascending=False)
cord = list(dmso['test'])

h2o = vals96.loc[vals96['control'] == 'H2O']
h2o = h2o.set_index('test').reindex(cord).reset_index()
reordered = dmso.append(h2o)
reordered = reordered.drop(columns=['rank'])

In [44]:
#reordered.to_csv('/Users/Emily/Documents/S1/reorderedS2.csv')