In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pathlib as plb
import seaborn as sns
from scipy import stats
import statistics
import dabest as db

#### 1. Unblinding the metadata
<p>All of our compounds were blinded and placed into 96 well plates for dispensing. We blinded our compounds by assigning them randomized locations within the 96-well plate format and used used the locations as the blinding key.

In [2]:
p1 = pd.read_csv('C:/Users/Emily/Documents/S1F1/S1fuPlate1.csv')
p1s = p1.iloc[range(6)]
p1s = p1s.melt(id_vars=['Let'], var_name= 'Col', value_name= 'Compound')
p1s['CPID'] = 'P1'

In [3]:
p2 = pd.read_csv('C:/Users/Emily/Documents/S1F1/S1fuPlate2.csv')
p2s = p2.iloc[range(6)]
p2s = p2s.melt(id_vars=['Let'], var_name= 'Col', value_name= 'Compound')
p2s['CPID'] = 'P2'

In [4]:
sub = ['Compound', 'Location']
ckey = pd.read_csv('C:/Users/Emily/Documents/S1F1/ckey.csv', index_col=0).drop(columns=['Let','Col'])
ckey_x = pd.read_csv('C:/Users/Emily/Documents/S1F1/D5P1.csv', usecols=sub)
ckey_x.rename(columns={'Location': 'Compound Well'}, inplace=True)
ckey_x['CPID'] = 'D5P1'
ckeys = ckey.append(ckey_x)

In [5]:
s1f1_md = pd.read_csv('C:/Users/Emily/Documents/S1F1/S1F1_metadata.csv', delimiter=',', 
                      encoding='utf-8-sig')

s1f1_md['Image ID'] = s1f1_md['Image ID'].str[:5] + '0' + s1f1_md['Image ID'].str[5:8]

s1f2_md = pd.read_csv('C:/Users/Emily/Documents/S1F2/S1F2_metadata.csv', delimiter=',', 
                 encoding='utf-8-sig')

all_md = s1f1_md.append(s1f2_md)
all_md.rename(columns={'-': 'Date'}, inplace=True)

In [6]:
all_md.head()

Unnamed: 0,Date,Recorder,Plate ID,Different strains in each well of a single assay plate? (Y/N),Different compounds in each well of a single assay plate? (Y/N),Compound library ID,Compound Well A,Compound Well B,Compound Well C,Compound Well D,...,Chemotaxis Start (24 hrs format),Chemotaxis End (24 hrs format),Image ID,Scanner Slot:,Scanner Slot,Scanner #,Strain Well A,Strain Well B,Strain Well C,Strain Well D
0,10/25/2021,Ehsan,S1F1_R1_01,,,D1P2,B2,B3,B4,B5,...,,,S1F1_001,1,,3,CX10,CX10,CX10,CX10
1,10/25/2021,Ehsan,S1F1_R1_02,,,D1P2,C2,C3,C4,C5,...,,,S1F1_001,2,,3,CX10,CX10,CX10,CX10
2,10/25/2021,Ehsan,S1F1_R1_03,,,D1P2,D2,D3,D4,D5,...,,,S1F1_001,3,,3,CX10,CX10,CX10,CX10
3,10/25/2021,Ehsan,S1F1_R1_04,,,D1P2,E2,E3,E4,E5,...,,,S1F1_001,4,,3,CX10,CX10,CX10,CX10
4,10/25/2021,Ehsan,S1F1_R1_05,,,D1P2,F2,F3,F4,F5,...,,,S1F1_002,1,,3,CX10,CX10,CX10,CX10


In [7]:
def add_cipd(row):
    if row['Compound library ID'] != 'D5P1':
        pid = row['Compound library ID'][2:4]
    else:
        pid = 'D5P1'
    return pid

all_md['CPID'] = all_md.apply(lambda row: add_cipd(row), axis=1)


In [8]:
all_md['Scanner Slot:'] = all_md['Scanner Slot:'].apply(str)

In [9]:
df_obj = all_md.select_dtypes(['object', 'string'])
all_md = all_md[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
all_md = all_md.fillna('Empty')
all_md.tail()

Unnamed: 0,Date,Recorder,Plate ID,Compound library ID,Compound Well A,Compound Well B,Compound Well C,Compound Well D,Chemotaxis Start (24 hrs format),Chemotaxis End (24 hrs format),Image ID,Scanner Slot:,Strain Well A,Strain Well B,Strain Well C,Strain Well D,CPID
34,3/17/2022,Ehsan,S1F2_R3_11,D3P1,F2,F3,F4,F5,2:45,3:45,S1F2_009,3,GN1077,GN1077,GN1077,GN1077,P1
35,3/17/2022,Ehsan,S1F2_R3_12,D3P1,G2,G3,G4,G5,2:45,3:45,S1F2_009,4,GN1077,GN1077,GN1077,GN1077,P1
36,3/21/2022,EMILY,S1F2_R4_1,D4P1,F5,G3,G2,F4,10:55,11:55,S1F2_010,1,GN1077,GN1077,GN1077,GN1077,P1
37,3/21/2022,EMILY,S1F2_R4_2,D4P1,E2,C4,E4,B3,10:55,11:55,S1F2_010,2,GN1077,GN1077,GN1077,GN1077,P1
38,3/21/2022,EMILY,S1F2_R4_3,D4P2,D4,F2,Empty,Empty,10:55,11:55,S1F2_010,3,GN1077,GN1077,GN1077,GN1077,P2


In [10]:
def add_compoundA(row, compound_map):
    if row['Compound Well A'] != 'Empty':
        compound = compound_map.loc[
            (compound_map['CPID']==row['CPID']) & 
            (compound_map['Compound Well']==row['Compound Well A'])]['Compound']
        if len(compound)<0:
            return "Empty"
        else:
            return compound.values[0]
    else:
        return "Empty"
    
def add_compoundB(row, compound_map ):
    if row['Compound Well B'] != 'Empty':
        compound = compound_map.loc[
            (compound_map['CPID']==row['CPID']) & 
            (compound_map['Compound Well']==row['Compound Well B'])]['Compound']
        if len(compound)<0:
            return "Empty"
        else:
            return compound.values[0]
    else:
        return "Empty"

def add_compoundC(row, compound_map ):
    if row['Compound Well C'] != 'Empty':
        compound = compound_map.loc[
            (compound_map['CPID']==row['CPID']) & 
            (compound_map['Compound Well']==row['Compound Well C'])]['Compound']
        if len(compound)<0:
            return "Empty"
        else:
            return compound.values[0]
    else:
        return "Empty"

def add_compoundD(row, compound_map ):
    if row['Compound Well D'] != 'Empty':
        compound = compound_map.loc[
            (compound_map['CPID']==row['CPID']) & 
            (compound_map['Compound Well']==row['Compound Well D'])]['Compound']
        if len(compound)<0:
            return "Empty"
        else:
            return compound.values[0]
    else:
        return "Empty"


In [11]:
all_md['Compound A'] = all_md.apply(
    lambda row: add_compoundA(row, ckeys), axis=1)

all_md['Compound B'] = all_md.apply(
    lambda row: add_compoundB(row, ckeys), axis=1)
    
all_md['Compound C'] = all_md.apply(
    lambda row: add_compoundC(row, ckeys), axis=1)
    
all_md['Compound D'] = all_md.apply(
    lambda row: add_compoundD(row, ckeys), axis=1)

In [12]:
F1_ia_results = pd.read_csv('C:/Users/Emily/Documents/S1f1/124_ia/S1F1.csv', index_col=0).drop(columns=['Large Object'])
F2_ia_results = pd.read_csv('C:/Users/Emily/Documents/S1F2/S1F2_ia.csv', index_col=0).drop(columns=['Large Object'])
#make_ups = pd.read_csv('C:/Users/Emily/Documents/S1F2/S1F2_ia.csv', index_col=0).drop(columns=['Large Object'])
all_ia = F1_ia_results.append(F2_ia_results)


In [13]:
broll = ['S1F1_b010', 'S1F1_b011', 'S1F1_b012']
all_ia= all_ia[~all_ia['File Name'].isin(broll)]
ia_results = all_ia.loc[all_ia['Total Worms'] >= 150]
ia_results.head()

Unnamed: 0,WellNo,Total Worms,Chemotaxis,Compound,Strain,File Name,Well width,Plate ID
1,1B,199.0,0.233533,,,S1F1_001,3044.0,
2,1C,306.0,0.225352,,,S1F1_001,3044.0,
3,1D,208.0,0.390374,,,S1F1_001,3057.0,
4,2A,282.0,-0.01992,,,S1F1_001,3057.0,
5,2B,239.0,0.171429,,,S1F1_001,3043.0,


In [14]:
def add_PlateID(row, metadata):
    slotID = row['WellNo'][0]
    pid = metadata.loc[
        (metadata['Image ID']==row['File Name']) & 
        (metadata['Scanner Slot:']==slotID)]['Plate ID']

    if len(pid) == 0:
        print(row['File Name'])
        pass
    #print(pid)
    else:
        return pid.values[0]

ia_results['Plate ID'] = ia_results.apply(
    lambda row: add_PlateID(row, all_md), axis=1)

ia_results.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Unnamed: 0,WellNo,Total Worms,Chemotaxis,Compound,Strain,File Name,Well width,Plate ID
1,1B,199.0,0.233533,,,S1F1_001,3044.0,S1F1_R1_01
2,1C,306.0,0.225352,,,S1F1_001,3044.0,S1F1_R1_01
3,1D,208.0,0.390374,,,S1F1_001,3057.0,S1F1_R1_01
4,2A,282.0,-0.01992,,,S1F1_001,3057.0,S1F1_R1_02
5,2B,239.0,0.171429,,,S1F1_001,3043.0,S1F1_R1_02


In [15]:
#ia = pd.read_csv('D:/_2021_08_screen/S1fu/ub_md.csv', index_col=0)
#ia.head()

In [16]:
def add_Compound(row, metadata):
    wellID = row['WellNo'][1]

    if wellID == 'A':
        compound = metadata.loc[metadata['Plate ID']==row['Plate ID']]['Compound A']
    elif wellID == 'B':
        compound = metadata.loc[metadata['Plate ID']==row['Plate ID']]['Compound B']
    elif wellID == 'C':
        compound = metadata.loc[metadata['Plate ID']==row['Plate ID']]['Compound C']
    elif wellID == 'D':
        compound = metadata.loc[metadata['Plate ID']==row['Plate ID']]['Compound D']
    #print(compound)
    return compound.values[0]

ia_results['Compound'] = ia_results.apply(
    lambda row: add_Compound(row, all_md), axis=1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


In [17]:
def add_Strain(row, metadata):
    slotID = row['WellNo'][0]
    strain = metadata.loc[
        (metadata['Plate ID']==row['Plate ID']) & 
        (metadata['Scanner Slot:']==slotID)]['Strain Well A ']
    
    if len(strain) == 0:
        return 'Empty'
    else:
        return strain.values[0]

ia_results['Strain'] = ia_results.apply(
    lambda row: add_Strain(row, all_md), axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [18]:
filtered = ia_results.loc[ia_results['Total Worms'] >= 150]
print(len(filtered))

487


In [19]:
filtered = filtered.loc[filtered['Compound'] != 'Empty']

In [20]:
filtered = filtered.sort_values(['Compound','Strain', 'File Name'], ascending=[True, True, True])


In [21]:
S1F1_worm_locs_foldr = plb.Path('C:/Users/Emily/Documents/S1f1/124_ia/')
S1F2_worm_locs_foldr = plb.Path('C:/Users/Emily/Documents/S1F2/')

In [22]:
# 1 inch = 25.4mm
mm = 25.4
# 1200 pixels per 25.4mm
px_mm = 1200/mm

#The following transforms the worm location data so that it is respective to the start zone
# The start zone is at the center of the image; Start Zone = 0mm
#Worms with positive values are closer to the compound, negative values are away from the compound


In [23]:
def get_worm_locs(row, wrms, result_dict, strain): 

    fname = row['File Name']
    wellnum = row['WellNo']

    if row['Strain'] == strain:
        loc_fname =  wrms.joinpath('loc_' + fname + '_' + wellnum + '.csv')

        temp = pd.read_csv(loc_fname)
        compound = row['Compound']
        xs = temp['X']
        #xs = list(temp['centroid-1'])
        if compound in result_dict:
            result_dict[compound] = result_dict[compound].append(xs)
            result_dict[compound].reset_index(inplace=True, drop=True)
            #result_dict[compound] = result_dict[compound]+xs
        else:
            result_dict[compound]=xs

    return result_dict


In [24]:

def all_xs(strn, df, loc):
# Need to create an empty dictionary to hold the values
    results_dict = {}
    compound = ''
    i=0
    
    df2 = df.loc[df['Strain'] == strn]

    for index, row in df2.iterrows():
        if row['Compound'] == compound:
            i += 1
            compound = row['Compound']
            if i < 3:
                pooled = get_worm_locs(row, loc, results_dict, strn)
            else:
                continue
        else:
            i = 0
            compound = row['Compound']
            pooled = get_worm_locs(row, loc, results_dict, strn)

    pooled_df = pd.DataFrame.from_dict(pooled)
    strain_df = pooled_df.apply(lambda x: -(x/px_mm)+32.5)

    strain_df = strain_df.drop(columns = ['Rosmarinic acid', '2-Nonylquinolin-4(1H)-one'])
    strain_df.to_csv('C:/Users/Emily/Documents/S1F2/' + strn + '_xs.csv')
    return strain_df

In [25]:
GNmm_df = all_xs('GN1077', filtered, S1F2_worm_locs_foldr)
PRmm_df = all_xs('PR678', filtered, S1F1_worm_locs_foldr)
CXmm_df = all_xs('CX10', filtered, S1F1_worm_locs_foldr)

In [26]:
n2 = pd.read_csv('C:/Users/Emily/Documents/S1/S1_xs3.csv')

In [29]:
cols = list(GNmm_df.columns)
cols.insert(0, cols.pop(cols.index('DMSO')))

temp = pd.DataFrame()
N2_all = pd.DataFrame()

strains = ['PR678', 'CX10', 'GN1077']
refs = ['DMSO']

for s in strains:
    if s == 'CX10':
        xs = CXmm_df
    elif s == 'PR678':
        xs = PRmm_df
    elif s == 'GN1077':
        xs = GNmm_df
    for c in cols:
        temp['N2'] = n2[c]
        temp[s] = xs[c]
        db_obj = db.load(temp, idx=(['N2', s]))
        hold = db_obj.mean_diff.results
        hold['Compound'] = c
        N2_all = N2_all.append(hold)
            
N2_all.to_csv('C:/Users/Emily/Documents/S1F2/N2vMuts_mdiff_dmso.csv')
    

In [30]:
N2_all.head()

Unnamed: 0,control,test,control_N,test_N,effect_size,is_paired,difference,ci,bca_low,bca_high,...,random_seed,pvalue_permutation,permutation_count,pvalue_welch,statistic_welch,pvalue_students_t,statistic_students_t,pvalue_mann_whitney,statistic_mann_whitney,Compound
0,N2,PR678,1068,851,mean difference,False,-1.24181,95,-2.549919,0.106607,...,12345,0.0804,5000,0.070559,1.809315,0.078764,1.75884,0.1685817,471037.0,DMSO
0,N2,PR678,911,675,mean difference,False,-2.113225,95,-3.508475,-0.685667,...,12345,0.0052,5000,0.002868,2.986249,0.003964,2.88521,0.0001634473,341458.0,(-)-Huperzine A
0,N2,PR678,652,668,mean difference,False,2.251417,95,0.773548,3.672143,...,12345,0.0022,5000,0.002324,-3.051496,0.002278,-3.05736,0.001052532,195083.0,1-octanol
0,N2,PR678,705,670,mean difference,False,-3.645221,95,-5.272238,-2.014324,...,12345,0.0,5000,1.3e-05,4.383913,1.3e-05,4.380289,8.454932e-07,272418.0,"2,3-Dihydrobenzofuran"
0,N2,PR678,971,998,mean difference,False,-2.447849,95,-3.730321,-1.095766,...,12345,0.0006,5000,0.000246,3.674048,0.000234,3.685733,3.557064e-05,536678.0,"2,5-Dihydroxybenzoic acid"


In [31]:
for s in strains:
    if s == 'CX10':
        xs = CXmm_df
    elif s == 'PR678':
        xs = PRmm_df
    elif s == 'GN1077':
        xs = GNmm_df
    for c in cols:
        temp['N2'] = n2[c]
        temp[s] = xs[c]
        db_obj = db.load(temp, idx=(['N2', s]))
        hold = db_obj.cohens_d.results
        hold['Compound'] = c
        N2_all = N2_all.append(hold)
            
#N2_all.to_csv('C:/Users/Emily/Documents/S1F2/cohensD.csv')

KeyboardInterrupt: 

In [32]:

def mean_diff_calc(df,strain , ref):
    cols = list(df.columns)
    cols.insert(0, cols.pop(cols.index(ref)))
    db_obj = db.load(df, idx=(cols))
    results_df = db_obj.mean_diff.results
    results_df['Strain'] = strain
    #results_df.to_csv('C:/Users/Emily/Documents/S1F2/'+ strain + '_' + ref 
    #                 + '_meandif.csv')
    return results_df

In [33]:
all_strains = pd.DataFrame()

for s in strains:
    for r in refs:
        if s == 'CX10':
            xs = CXmm_df
        elif s == 'PR678':
            xs = PRmm_df
        elif s == 'GN1077':
            xs = GNmm_df
        df = mean_diff_calc(xs, s, r)
        all_strains = all_strains.append(df)
all_strains.to_csv('C:/Users/Emily/Documents/S1F2/all_strains_nullrefs_mdiff_dmso.csv')

In [34]:
all_strains_wide = pd.pivot_table(all_strains, values = ['difference', 'bca_low', 'bca_high', 'pvalue_mann_whitney'], 
                                  index=['test', 'control'], columns = 'Strain').reset_index()

all_strains_wide.columns = [c[1] + c[0] for c in all_strains_wide.columns]



In [35]:
all_strains_wide.to_csv('C:/Users/Emily/Documents/S1F2/all_strains_wide_dmso.csv')

In [41]:
attracted = all_strains.loc[(all_strains['bca_low']>0) & (all_strains['bca_high']>0)]
repelled = all_strains.loc[(all_strains['bca_low']<0) & (all_strains['bca_high']<0)]
attracted.to_csv('C:/Users/Emily/Documents/S1F2/attracted_mutants.csv')
repelled.to_csv('C:/Users/Emily/Documents/S1F2/repelled mutatants.csv')

In [51]:
pr = all_strains.loc[all_strains['Strain']=='PR678']

pr_change = pr.loc[((pr['bca_low']<0) & (pr['bca_high']>0))]

In [52]:
pr_change

Unnamed: 0,control,test,control_N,test_N,effect_size,is_paired,difference,ci,bca_low,bca_high,...,random_seed,pvalue_permutation,permutation_count,pvalue_welch,statistic_welch,pvalue_students_t,statistic_students_t,pvalue_mann_whitney,statistic_mann_whitney,Strain
15,DMSO,Ethyl p-methoxycinnamate,851,620,mean difference,False,0.164257,95,-1.115178,1.434781,...,12345,0.7978,5000,0.798577,-0.255237,0.802705,-0.249893,0.44493,257664.0,PR678
40,DMSO,p-Tolualdehyde,851,734,mean difference,False,-0.583399,95,-1.81514,0.656114,...,12345,0.3572,5000,0.351137,0.932661,0.35497,0.925264,0.442655,319293.0,PR678


In [54]:
cx = all_strains.loc[all_strains['Strain']=='CX10']
cx_change = cx.loc[((cx['bca_low']<0) & (cx['bca_high']>0))]
cx_change

Unnamed: 0,control,test,control_N,test_N,effect_size,is_paired,difference,ci,bca_low,bca_high,...,random_seed,pvalue_permutation,permutation_count,pvalue_welch,statistic_welch,pvalue_students_t,statistic_students_t,pvalue_mann_whitney,statistic_mann_whitney,Strain
0,DMSO,(-)-Huperzine A,936,976,mean difference,False,0.282733,95,-1.201574,1.854002,...,12345,0.7118,5000,0.714771,-0.36551,0.714768,-0.365514,0.73614,452701.0,CX10
1,DMSO,1-octanol,936,806,mean difference,False,-1.173355,95,-2.655425,0.347436,...,12345,0.1274,5000,0.126635,1.528241,0.129798,1.515617,0.239628,389518.0,CX10
3,DMSO,"2,5-Dihydroxybenzoic acid",936,840,mean difference,False,-1.519682,95,-2.999781,0.08808,...,12345,0.0572,5000,0.054567,1.923587,0.055002,1.920126,0.066968,412887.0,CX10
6,DMSO,Acetophenone,936,1056,mean difference,False,1.138951,95,-0.364586,2.593818,...,12345,0.1334,5000,0.133772,-1.500012,0.133774,-1.499997,0.128972,474756.0,CX10
10,DMSO,Cinnamyl Alcohol,936,1007,mean difference,False,1.396142,95,-0.08518,2.901544,...,12345,0.071,5000,0.069414,-1.816727,0.06943,-1.816614,0.067077,448648.0,CX10
12,DMSO,Daucosterol,936,1002,mean difference,False,0.195527,95,-1.364387,1.754244,...,12345,0.798,5000,0.803097,-0.249376,0.803359,-0.249037,0.843487,466505.0,CX10
14,DMSO,Ellagic acid,936,1042,mean difference,False,0.994368,95,-0.526178,2.439284,...,12345,0.1974,5000,0.189804,-1.311617,0.189569,-1.312306,0.18493,470842.0,CX10
15,DMSO,Ethyl p-methoxycinnamate,936,894,mean difference,False,0.881641,95,-0.537618,2.276165,...,12345,0.2294,5000,0.228427,-1.20483,0.230175,-1.200307,0.133312,401429.0,CX10
16,DMSO,Ethyl palmitate,936,815,mean difference,False,0.38787,95,-1.189608,1.951536,...,12345,0.6254,5000,0.62924,-0.482883,0.629628,-0.482335,0.60523,375964.5,CX10
22,DMSO,L-Mimosine,936,855,mean difference,False,0.684485,95,-0.811796,2.196595,...,12345,0.3884,5000,0.387342,-0.86466,0.387782,-0.863858,0.341457,389740.0,CX10


In [40]:
change.to_csv('C:/Users/Emily/Documents/S1F2/response_change_nlref.csv')

In [35]:
n2_dat = pd.read_csv('C:/Users/Emily/Documents/S1/inner3.csv', index_col=0)
n2_dat['Strain'] = 'N2'

n2_dat_wide = pd.pivot_table(n2_dat, values = ['difference', 'bca_low', 'bca_high', 'pvalue_mann_whitney'], 
                                  index=['test', 'control'], columns = 'Strain').reset_index()

n2_dat_wide.columns = [c[1] + c[0] for c in n2_dat_wide.columns]
n2_dat_wide.head()

Unnamed: 0,test,control,N2bca_high,N2bca_low,N2difference,N2pvalue_mann_whitney
0,(-)-Huperzine A,DMSO,4.772488,1.869578,3.377967,6.207573e-06
1,(-)-Huperzine A,H2O,4.435206,1.392195,2.959551,0.0002309168
2,1-octanol,DMSO,-5.916856,-8.944625,-7.446258,1.296941e-18
3,1-octanol,H2O,-6.307701,-9.412386,-7.864674,4.284519e-20
4,"2,3-Dihydrobenzofuran",DMSO,10.673937,7.629789,9.194467,1.172357e-28


In [36]:
all_strains_wide = all_strains_wide.merge(n2_dat_wide, on=['test','control'], how='left')
all_strains_wide.to_csv('C:/Users/Emily/Documents/NP_manuscript/Data/all_strains_wide.csv')

In [34]:
GN_sig = N2_all.loc[(N2_all['pvalue_mann_whitney']<.05)&(N2_all['test'] == 'GN1077')][['control', 'test', 'Compound', 'difference','pvalue_mann_whitney']].sort_values(by=['difference'])
#GN_sig = GN_sig.rename({'difference':'GN1077 Effect Size', 'pvalue_mann_whitney': 'GN1077 MWU pvalue'})

PR_sig = N2_all.loc[(N2_all['pvalue_mann_whitney']<.05)&(N2_all['test'] == 'PR678')][['control', 'test', 'Compound','difference' ,'pvalue_mann_whitney']]
CX_sig = N2_all.loc[(N2_all['pvalue_mann_whitney']<.05)&(N2_all['test'] == 'CX10')][['control', 'test', 'Compound', 'difference', 'pvalue_mann_whitney']]

In [35]:
def add_mutant_p(row, d):
    b =  d.loc[d['Compound']==row['Compound']][['difference', 'pvalue_mann_whitney']]
    if len(b) == 0:
        b=['NaN', 'NaN']
        #print(b)
    else:
        return b.values[0].tolist()

GN_sig[['PR678 Effect Size', 'PR678 MWU pvalue']] = GN_sig.apply(
    lambda row: add_mutant_p(row, PR_sig), axis=1, result_type='expand')

GN_sig[['CX10 Effect Size', 'CX10 MWU pvalue']] = GN_sig.apply(
    lambda row: add_mutant_p(row, CX_sig), axis=1, result_type='expand')

GN_sig = GN_sig.rename(columns={'difference':'GN1077 Effect Size', 'pvalue_mann_whitney': 'GN1077 MWU pvalue'}).drop(columns=['control', 'test'])


In [36]:
comps = all_strains['test'].unique().tolist()


comps.remove('DMSO')
comps.remove('H2O')
comps

['(-)-Huperzine A',
 '1-octanol',
 '2,3-Dihydrobenzofuran',
 '2,5-Dihydroxybenzoic acid',
 '2-Methyl-1-butanol',
 '2-nonanone',
 'Acetophenone',
 'Anisole',
 'Camphor',
 'Carnosol',
 'Cinnamyl Alcohol',
 'Coumaran',
 'Daucosterol',
 'Diacetyl',
 'Ellagic acid',
 'Ethyl p-methoxycinnamate',
 'Ethyl palmitate',
 'Furfural',
 'Guaiazulene',
 'Isoamyl alcohol',
 'Isoquinoline',
 'L-Mimosine',
 'Lapachol',
 'Leonurine',
 'Limonin',
 'Methyl palmitate',
 'Oleanolic Acid',
 'Paeoniflorin',
 'Phenylacetylene',
 'Phytol',
 'Piperitenone',
 'Piperonyl Alcohol',
 'Sabinene',
 'Salvinorin A Propionate',
 'Sinomenine hydrochloride',
 'Solasodine',
 'Spinosad',
 'Thiophene',
 'Ursolic acid',
 'p-Tolualdehyde',
 'α-Phellandrene']

In [37]:
for s in strains:
    for c in comps:
        dmso = all_strains.loc[(all_strains['test'] == c) & (all_strains['control']=='DMSO') & 
                               (all_strains['Strain'] ==  s)][['bca_low', 'bca_high']].values[0].tolist()
        h2o = all_strains.loc[(all_strains['test'] == c) & (all_strains['control']=='H2O') & 
                               (all_strains['Strain'] ==  s)][['bca_low', 'bca_high']].values[0].tolist()

        if (dmso[0] > 0) & (dmso[1]>0)  & (h2o[0] > 0) & (h2o[1] > 0) :
            print('Positive DMSO: ' + c + ' ' + s)
        elif (dmso[0] < 0) & (dmso[1] <0) & (h2o[0] < 0) & (h2o[1] < 0) :
            print('Negative DMSO: ' + c + ' ' + s)

Negative DMSO: 1-octanol PR678
Positive DMSO: 2,3-Dihydrobenzofuran PR678
Negative DMSO: 2-nonanone PR678
Negative DMSO: Acetophenone PR678
Positive DMSO: Anisole PR678
Positive DMSO: Carnosol PR678
Positive DMSO: Diacetyl PR678
Negative DMSO: Furfural PR678
Positive DMSO: Piperitenone PR678
Positive DMSO: 2,3-Dihydrobenzofuran CX10
Positive DMSO: 2-Methyl-1-butanol CX10
Negative DMSO: 2-nonanone CX10
Positive DMSO: Anisole CX10
Positive DMSO: Diacetyl CX10
Negative DMSO: Guaiazulene CX10
Positive DMSO: Isoamyl alcohol CX10
Negative DMSO: Oleanolic Acid CX10
Positive DMSO: Paeoniflorin CX10
Positive DMSO: Phenylacetylene CX10
Positive DMSO: Piperitenone CX10
Negative DMSO: Piperonyl Alcohol CX10
Negative DMSO: Acetophenone GN1077
Positive DMSO: Daucosterol GN1077
Positive DMSO: Solasodine GN1077


In [52]:
all_strains_nlref = pd.read_csv('C:/Users/Emily/Documents/S1F2/all_strains_nullrefs_mdiff.csv', 
                                index_col = 0)
all_strains_n2 = pd.read_csv('C:/Users/Emily/Documents/S1F2/N2vMuts_mdiff.csv', 
                                index_col = 0)

### Filtering for the same response as DMSO or H2O
<p> The data below represents a loss or reduction in behavioral phenotype for each of the mutant lines</p>

In [53]:


cx10 = all_strains_nlref.loc[(all_strains_nlref['Strain'] == 'CX10') & (all_strains_nlref['bca_low']<0) 
                             & (all_strains_nlref['bca_high']>0)]
print(len(cx10['test'].unique().tolist()))

PR678 = all_strains_nlref.loc[(all_strains_nlref['Strain'] == 'PR678') & (all_strains_nlref['bca_low']<0) 
                             & (all_strains_nlref['bca_high']>0)]
print(len(PR678['test'].unique().tolist()))

GN1077 = all_strains_nlref.loc[(all_strains_nlref['Strain'] == 'GN1077') & (all_strains_nlref['bca_low']<0) 
                             & (all_strains_nlref['bca_high']>0)]
print(len(GN1077['test'].unique().tolist()))

29
23
38


In [54]:
### Generating a dataframe that can made in to a table to report the results above
srtd = pd.read_csv('C:/Users/Emily/Documents/S1F2/sig_mut.csv')
srtd_cmps = srtd['Compound']

In [55]:
strains = ['PR678', 'CX10', 'GN1077']


def cat_dat(s, df, df2):
    x = df.loc[(df['Strain'] == s)  & (df['bca_low']<0) 
                             & (df['bca_high']>0)]
    
    c = x['test'].unique().tolist()
    
    y = df2.loc[(df2['test'] == s)  & (df2['bca_low']<0) & (df2['bca_high']>0)]
    
    z= y['Compound'].unique().tolist()

    #print(n2c)
    return c, z


In [56]:
nlref = pd.DataFrame(columns=['Compound', 'GN1077', 'PR678', 'CX10'])
nlref['Compound'] = srtd_cmps
nlref

Unnamed: 0,Compound,GN1077,PR678,CX10
0,Isoamyl alcohol,,,
1,2-Methyl-1-butanol,,,
2,Thiophene,,,
3,"2,3-Dihydrobenzofuran",,,
4,Diacetyl,,,
5,Acetophenone,,,
6,Phenylacetylene,,,
7,Paeoniflorin,,,
8,α-Phellandrene,,,
9,Coumaran,,,


In [50]:


for s in strains:
    match_null, match_n2  = cat_dat(s, all_strains_nlref, all_strains_n2)
    for index, row in nlref.iterrows():
        if (row['Compound'] in match_null) & (row['Compound'] not in match_n2):
            row[s] = '*  +'
        elif (row['Compound'] in match_null) & (row['Compound'] in match_n2):
            row[s] = '*'
        elif (row['Compound'] not in match_null) & (row['Compound'] not in match_n2):
            row[s] = '+'
    

KeyError: 'bca_low'

In [44]:
nlref.to_csv('C:/Users/Emily/Documents/S1F2/n2andnull.csv')