In [3]:
import sys, os, csv, subprocess
from datetime import date
from io import StringIO
import pandas as pd, numpy as np
import time,timeit,numba,cython
from scipy.spatial import distance as sp_distance
from scipy import stats as sp_stats
%load_ext Cython

In [4]:
@numba.jit(nogil=True)
def correlation(u, v, w=None, centered=True): #rip from scipy.spatial.distance source
    """
    Compute the correlation distance between two 1-D arrays.
    The correlation distance between `u` and `v`, is
    defined as
    .. math::
        1 - \\frac{(u - \\bar{u}) \\cdot (v - \\bar{v})}
                  {{||(u - \\bar{u})||}_2 {||(v - \\bar{v})||}_2}
    where :math:`\\bar{u}` is the mean of the elements of `u`
    and :math:`x \\cdot y` is the dot product of :math:`x` and :math:`y`.
    Parameters
    ----------
    u : (N,) array_like
        Input array.
    v : (N,) array_like
        Input array.
    w : (N,) array_like, optional
        The weights for each value in `u` and `v`. Default is None,
        which gives each value a weight of 1.0
    Returns
    -------
    correlation : double
        The correlation distance between 1-D array `u` and `v`.
    """
#     u = _validate_vector(u)
#     v = _validate_vector(v)
#     if w is not None:
#         w = _validate_weights(w)
    if centered:
        umu = np.mean(u) #, weights=w)
        vmu = np.mean(v) #, weights=w)
        u = u - umu
        v = v - vmu
    uv = np.mean(u * v) #, weights=w)
    uu = np.mean(np.square(u))#, weights=w)
    vv = np.mean(np.square(v))#, weights=w)
    dist = 1.0 - uv / np.sqrt(uu * vv)
    return dist

@numba.njit(parallel=False,nogil=True)
def numbaParallelCorrDistonRef(compSig,refArray):
    """attempt at making a nice numba accelerated version of CorrDist function,
    by parallelizing the specific compDist calculation to refSigs."""
#     if isinstance(refArray, pd.DataFrame):
#         refArray = refArray.to_numpy()
    numRefSigs = refArray.shape[0]
    returnDists = np.zeros(refArray.shape)
    for i in numba.prange(numRefSigs):
        returnDists[i] = (1.0 - np.mean(compSig-np.mean(compSig) * refArray[i]-np.mean(refArray[i])) / np.sqrt(np.mean(np.square(compSig-np.mean(compSig))) * np.mean(np.square(refArray[i]-np.mean(refArray[i])))))
#         correlation(compSig, refArray[i])
    return returnDists

  @numba.jit(nogil=True)


In [5]:
# %%cython
from scipy.spatial import distance as sp_distance
from scipy import stats as sp_stats
import numpy as np
#     @numba.jit#(nogil=True)
def corrDist (compSig:np.ndarray, refArray:np.ndarray) -> np.ndarray:
    returnDists = [0]*refArray.shape[0]
    for i in range(refArray.shape[0]):
        x = refArray[i,:]
        returnDists[i] = sp_distance.correlation(compSig, x)
    return np.array(returnDists)

#     @numba.jit#(nogil=True)
def pearsonr (compSig:np.ndarray, refArray:np.ndarray) -> np.ndarray:
    returnCorrs = [0]*refArray.shape[0]
    for i in range(refArray.shape[0]):
        x = refArray[i,:]
        returnCorrs[i] = sp_stats.pearsonr(x, compSig)[0]
    return np.array(returnCorrs)


In [7]:
def testFingerprintPrcess(exp_df,ref_df):
    from scipy.spatial import distance as sp_distance
    from scipy import stats as sp_stats
    #import numpy as np
    #     @numba.jit#(nogil=True)
    def corrDist (compSig:np.ndarray, refArray:np.ndarray) -> np.ndarray:
        returnDists = [0]*refArray.shape[0]
        for i in range(refArray.shape[0]):
            x = refArray[i,:]
            returnDists[i] = sp_distance.correlation(compSig, x)
        return np.array(returnDists)

    #     @numba.jit#(nogil=True)
    def pearsonr (compSig:np.ndarray, refArray:np.ndarray) -> np.ndarray:
        returnCorrs = [0]*refArray.shape[0]
        for i in range(refArray.shape[0]):
            x = refArray[i,:]
            returnCorrs[i] = sp_stats.pearsonr(x, compSig)[0]
        return np.array(returnCorrs)
    
    

#     def nestedApply_onExpDF (compSig):
#         distance = ref_df.apply(lambda x: corrDist(compSig, x.values), axis=1)
#         similarity = ref_df.apply(lambda x: pearsonr(compSig,x.values), axis=1)
#         return distance, similarity
    
    reporting_df = pd.DataFrame(columns=["TopSimilarRefComps","CorrespPearsonSim","CorrespCorrDistance"], index=exp_df.index)
    # one_to_one = list() # for sanity checks
    refcompounds = ["._.".join([str(y) for y in x]) for x in ref_df.index]
    print(ref_df.to_numpy().shape)
    
#     distance,similarity =
    distance = exp_df.apply(lambda compSig: corrDist(compSig.to_numpy(),ref_df.to_numpy()),axis=1)
    similarity = exp_df.apply(lambda compSig: pearsonr(compSig.to_numpy(),ref_df.to_numpy()),axis=1)
    reporting_df['TopSimilarRefComps'] = ".__.".join(refcompounds)
    reporting_df['CorrespCorrDistance'] = \
            [".__.".join(["{:.10f}".format(d_1) for d_1 in d]) for d in distance]
    reporting_df['CorrespPearsonSim'] = \
            [".__.".join(["{:.10f}".format(s_1) for s_1 in s]) for s in similarity]
    
    
#     for compSig in exp_df.iterrows():

#         comp, compSig = compSig.Index, np.array(compSig[1:])
#         print(compSig)
#         comp, compSig = compSig[0], compSig[1].values
    tester = pd.DataFrame(sp_distance.cdist(ref_df.to_numpy(), exp_df.to_numpy(),metric='correlation'),index=ref_df.index,columns=exp_df.index)
    tester2 = pd.DataFrame({distance.index[i]:d for i,d in enumerate(distance)})
    print(len(distance),tester,tester2,sep="\n\n")
#           ,pd.DataFrame({i:s for i,s in enumerate(similarity)}),sep="\n\n")
    testOrient = (tester.to_numpy() == np.transpose(np.array([d for d in distance])))
    print(testOrient,tester.shape,np.transpose(np.array([d for d in distance])).shape,np.sum(testOrient))
#           ,pd.DataFrame({i:s for i,s in enumerate(similarity)}).T,sep="\n\n")
        
    #convert to pd.Series for easier sorting and indexing
#     similarity_1 = [pd.Series({c:"{:.10f}".format(s) for s, c in zip(s_1,refcompounds)}).sort_values(ascending =False) for s_1 in similarity]
#     distance_1 = [pd.Series({c:"{:.10f}".format(d) for d, c in zip(d_1,refcompounds)}).sort_values() for d_1 in distance]
#     comp_1 = "._.".join([str(y) for y in comp]) #needed to access later for one_to_one
#     for i in range(exp_df.shape[0]):
#         print(similarity_1[i].index == distance_1[i].index,(similarity_1[i].index == distance_1[i].index).sum())
        
#     #print(comp,comp_1,similarity_1.index[0],distance_1.index[0],sep="\n")    
#     [print(ref,s) for ref,s in zip(similarity.index,similarity_1)]

#     # we should verify that the all the similairies and distances are in the same order
#     # otherwise the refCompound column cannot apply to both.
#     if (similarity_1.index == distance_1.index).sum():
#         #save the sort_value index order and reindex for performance
#         refcompounds_1 = distance_1.index.tolist()
#         similarity_1.reindex(index = refcompounds)
#         distance_1.reindex(index = refcompounds)

#         # print("reindexed the index")

#         reporting_df.loc[comp,"TopSimilarRefComps"] = ".__.".join(refcompounds_1[1:])
#         reporting_df.loc[comp,'CorrespPearsonSim'] = ".__.".join(\
#             ["{:.10f}".format(s) for s in similarity_1.drop(index=comp_1)])
#         reporting_df.loc[comp,"CorrespCorrDistance"] = ".__.".join(\
#             ["{:.10f}".format(d) for d in distance_1.drop(index=comp_1)])
#         # one_to_one.append((similarity_1[comp_1],distance_1[comp_1]))
#     else:
#         reporting_df.loc[comp,"TopSimilarRefComps"] = ".__.".join(refcompounds)
#         reporting_df.loc[comp,'CorrespPearsonSim'] = ".__.".join(\
#             ["{:.10f}".format(s) for s in similarity])
#         reporting_df.loc[comp,"CorrespCorrDistance"] = ".__.".join(\
#             ["{:.10f}".format(d) for d in distance])
#         # one_to_one.append((similarity[comp],distance[comp]))
# #         print(time.clock(),t2-t)
#     break

    return reporting_df#, one_to_one #for sanity checks

In [4]:
allDF = pd.read_csv("ReferenceExp/AllReferenceExp_20191018_commonFeaturesOrder.csv",index_col=[0,1,2,3])
testDF = pd.read_csv("ReferenceExp/SP0142_20171024_HeLa_10x_0_CP_histdiff_Concatenated.csv",index_col=[0,1,2,3])
testDF = testDF.reindex(columns = allDF.columns)
pd.__version__
allDF.to_numpy().shape

(4738, 245)

In [107]:
os.path.abspath('.')
os.chdir("../../../Scripts_AL/")

In [None]:
from scipy.spatial import distance as sp_distance
from scipy import stats as sp_stats

In [55]:
testSig_comp = testDF.index[0]
testSig = testDF.loc[testSig_comp]
%timeit print(np.array([(sp_distance.correlation(testSig.values, allDF.iloc[i].values),\
sp_stats.pearsonr(testSig.values,allDF.iloc[0].values)) for i in range(allDF.shape[0])]))

%timeit print(numbaParallelCorrDistonRef(testSig.values, allDF.to_numpy())[0])#,\
# sp_stats.pearsonr(testSig.values,allDF.to))

[[1.125537068155683 (-0.12553706815568294, 0.049682409209806384)]
 [1.2477948403025447 (-0.12553706815568294, 0.049682409209806384)]
 [0.627212606740434 (-0.12553706815568294, 0.049682409209806384)]
 ...
 [0.6607993378778821 (-0.12553706815568294, 0.049682409209806384)]
 [0.7032598012572853 (-0.12553706815568294, 0.049682409209806384)]
 [1.43765571388033 (-0.12553706815568294, 0.049682409209806384)]]
[[1.125537068155683 (-0.12553706815568294, 0.049682409209806384)]
 [1.2477948403025447 (-0.12553706815568294, 0.049682409209806384)]
 [0.627212606740434 (-0.12553706815568294, 0.049682409209806384)]
 ...
 [0.6607993378778821 (-0.12553706815568294, 0.049682409209806384)]
 [0.7032598012572853 (-0.12553706815568294, 0.049682409209806384)]
 [1.43765571388033 (-0.12553706815568294, 0.049682409209806384)]]
[[1.125537068155683 (-0.12553706815568294, 0.049682409209806384)]
 [1.2477948403025447 (-0.12553706815568294, 0.049682409209806384)]
 [0.627212606740434 (-0.12553706815568294, 0.04968240920980

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

[-0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0.84311724
 -0.84311724 -0.84311724 -0.84311724 -0.84311724 -0

In [8]:
fingerprintComparison = testFingerprintPrcess(testDF,allDF)
# %prun -l 10 -s cumulative testFingerprintPrcess(testDF,allDF)
# %timeit testFingerprintPrcess(testDF,allDF)
# , sameComp 
# with jit: 6min 11s ± 37.3 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
# without jit: 5min 12s ± 17.9 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
# without extra sorting/ordering: 5min 4s ± 21.1 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
# pull corrDist and pearsonR out of cell (maybe cython): 5min 37s ± 33.6 s per loop (mean ± std. dev. of 7 runs, 1 loop each)

(4738, 245)
320

CompoundName                                SW219778-1 SW219366-1 SW218151-2  \
Concentration                                      2.0        2.0        2.0   
PlateName                                       SP0142     SP0142     SP0142   
WellID                                             L08        O19        I04   
CompoundName Concentration PlateName WellID                                    
SW218124-2    10.0         SP0135    E03      1.125537   0.983073   1.331452   
SW219896-1    10.0         SP0135    O15      1.247795   1.024736   1.511265   
SW197639-3    10.0         SP0135    N08      0.627213   0.731539   0.645619   
SW218086-2    10.0         SP0135    G22      1.356639   1.220774   1.513809   
SW219171-1    10.0         SP0135    J03      1.401852   1.330168   1.284220   
...                                                ...        ...        ...   
SW200864     -1.0          SP20265   H10      0.403653   0.754851   0.594718   
SW201529     -1.0      

In [9]:
print(fingerprintComparison)
fingerprintComparison.to_excel("ReferenceExp/SP0142_20171024_HeLa_10x_0_CP_FeatureReport.xlsx",engine='xlsxwriter')


                                                                            TopSimilarRefComps  \
CompoundName Concentration PlateName WellID                                                      
SW219778-1   2.0           SP0142    L08     SW218124-2._.10.0._.SP0135._.E03.__.SW219896-1...   
SW219366-1   2.0           SP0142    O19     SW218124-2._.10.0._.SP0135._.E03.__.SW219896-1...   
SW218151-2   2.0           SP0142    I04     SW218124-2._.10.0._.SP0135._.E03.__.SW219896-1...   
SW219425-1   2.0           SP0142    K11     SW218124-2._.10.0._.SP0135._.E03.__.SW219896-1...   
SW219079-1   2.0           SP0142    J06     SW218124-2._.10.0._.SP0135._.E03.__.SW219896-1...   
...                                                                                        ...   
SW219664-1   2.0           SP0142    I09     SW218124-2._.10.0._.SP0135._.E03.__.SW219896-1...   
SW197554-3   2.0           SP0142    J16     SW218124-2._.10.0._.SP0135._.E03.__.SW219896-1...   
SW219415-1   2.0    

In [70]:
pd.DataFrame(sameComp,columns=['similarity','distance'],index=testDF.index)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,similarity,distance
CompoundName,Concentration,PlateName,WellID,Unnamed: 4_level_1,Unnamed: 5_level_1
SW219778-1,2.0,SP0142,L08,1.0,0.0
SW219366-1,2.0,SP0142,O19,1.0,0.0
SW218151-2,2.0,SP0142,I04,1.0,0.0
SW219425-1,2.0,SP0142,K11,1.0,0.0
SW219079-1,2.0,SP0142,J06,1.0,0.0
SW222338-1,2.0,SP0142,B04,1.0,0.0
SW219396-1,2.0,SP0142,J05,1.0,0.0
SW196607-4,2.0,SP0142,G20,1.0,0.0
SW219879-1,2.0,SP0142,H11,1.0,0.0
SW219768-1,2.0,SP0142,M13,1.0,0.0


In [41]:
v = [-1, 5, 0, 0, 10, 0, -7]
v1 = [1, 0, 0, 0, 0, 0, 0]
v2 = [0, 1, 0, 0, 1, 0, 0]
v3 = [1, 1, 0, 0, 0, 0, 1]    
df = pd.DataFrame([v1, v2, v3], columns=['a', 'b', 'c', 'd', 'e', 'f', 'g'])
from scipy.stats.stats import pearsonr
print(df)
s_corrs = df.apply(lambda x: pearsonr(x.values, v)[0], axis=1)
s_corrs

   a  b  c  d  e  f  g
0  1  0  0  0  0  0  0
1  0  1  0  0  1  0  0
2  1  1  0  0  0  0  1


0   -0.166667
1    0.839146
2   -0.353553
dtype: float64

### Similarity Report for Tannia's RAW cell data
apply the similarity report function created above, to generate reports for the NPFs against the rest in the individual datasets

In [74]:
os.chdir('../HiFAN/Tannia/RAW264cells_SChem/')

In [86]:
DMSO_df = pd.read_csv("RAW264Cells_FeaturesReduced/RAW264cells_HiFANnoCts_DMSO_reduced.csv",index_col=0)
DMSO_LPS_df = pd.read_csv("RAW264Cells_FeaturesReduced/RAW264cells_HiFANnoCts_DMSO+LPS_reduced.csv",index_col=0)
LPS_df = pd.read_csv("RAW264Cells_FeaturesReduced/RAW264cells_HiFANnoCts_LPS_reduced.csv",index_col=0)

In [91]:
DMSO_similiarityReport = testFingerprintPrcess(DMSO_df.loc[[x for x in DMSO_df.index if x.startswith("SP20265") or x.startswith("SP20264")]],DMSO_df)
DMSO_LPS_similarityReport = testFingerprintPrcess(DMSO_LPS_df.loc[[x for x in DMSO_LPS_df.index if x.startswith("SP20265") or x.startswith("SP20264")]],DMSO_LPS_df)
LPS_similarityReport = testFingerprintPrcess(LPS_df.loc[[x for x in LPS_df.index if x.startswith("SP20265") or x.startswith("SP20264")]],LPS_df)

In [101]:
DMSO_LPS_similarityReport.CorrespCorrDistance


#  print(DMSO_df.loc[[x for x in DMSO_df.index if x.startswith("SP20265") or x.startswith("SP20264")]].shape,\
#       DMSO_LPS_df.loc[[x for x in DMSO_LPS_df.index if x.startswith("SP20265") or x.startswith("SP20264")]].shape,\
#       LPS_df.loc[[x for x in LPS_df.index if x.startswith("SP20265") or x.startswith("SP20264")]].shape)


Features
SP20264._.DMSO_A01_-1.0    0.0458660910.__.0.0634208545.__.0.0809264832._...
SP20264._.DMSO_A02_-1.0    0.0814656154.__.0.0830850453.__.0.0870765451._...
SP20264._.DMSO_B01_-1.0    0.0491528696.__.0.0781651371.__.0.0968852320._...
SP20264._.DMSO_B02_-1.0    0.0623305403.__.0.1023847599.__.0.1239171461._...
SP20264._.DMSO_C01_-1.0    0.0567753436.__.0.0628449403.__.0.0855086347._...
SP20264._.DMSO_C02_-1.0    0.0618566780.__.0.0943016206.__.0.1079366243._...
SP20264._.DMSO_D01_-1.0    0.0634208545.__.0.0788555390.__.0.0805071849._...
SP20264._.DMSO_D02_-1.0    0.0448119346.__.0.0623305403.__.0.1037767343._...
SP20264._.DMSO_E01_-1.0    0.0491826965.__.0.1296133211.__.0.1311895735._...
SP20264._.DMSO_E02_-1.0    0.0628449403.__.0.0921236986.__.0.1016477370._...
SP20264._.DMSO_F01_-1.0    0.0955272176.__.0.1602466714.__.0.1652536607._...
SP20264._.DMSO_F02_-1.0    0.0448119346.__.0.1023847599.__.0.1374846465._...
SP20264._.DMSO_G01_-1.0    0.0491826965.__.0.1381643761.__.0.153196

In [102]:
DMSO_similiarityReport.to_excel("RAW264Cells_FeaturesReduced/RAW264Cells_HiFANnoCts_DMSO_NPFsimilarityReport.xlsx")
DMSO_LPS_similarityReport.to_excel("RAW264Cells_FeaturesReduced/RAW264Cells_HiFANnoCts_DMSO+LPS_NPFsimilarityReport.xlsx")
LPS_similarityReport.to_excel("RAW264Cells_FeaturesReduced/RAW264Cells_HiFANnoCts_LPS_NPFsimilarityReport.xlsx")