In [28]:
import numpy as np
import pandas as pd
import scipy
from scipy.spatial import distance
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from scipy.special import rel_entr
import math
from scipy.stats import wasserstein_distance

In [29]:
Predicted = pd.read_csv("./Stereoscope/Outputs/output_Sim5.csv")
Target=pd.read_csv("./Data/Sim5/target.csv")

In [30]:
Predicted.drop(columns="Unnamed: 0", inplace=True)
#Predicted.drop(columns="cell_ID", inplace=True)
#Predicted.drop(columns="prediction.score.max",inplace=True)


In [31]:
Target.drop(columns="Unnamed: 0", inplace=True)

In [32]:
for i in range(len(Target)):
    Target.loc[i,:]=Target.loc[i,:]/Target.loc[i,:].sum()

In [33]:
for i in range(len(Predicted)):
    Predicted.loc[i,:]=Predicted.loc[i,:]/Predicted.loc[i,:].sum()
 

In [34]:
columns_p=[]
for i in range(Predicted.shape[1]):
    columns_p.append(Predicted.columns[i])
    

In [35]:
columns_t=[]
for i in range(Target.shape[1]):
    columns_t.append(Target.columns[i])
    

In [36]:
columns_p=sorted(columns_p)
columns_t=sorted(columns_t)

In [37]:
Predicted=Predicted[columns_p]
Target=Target[columns_t]

In [38]:
Target=Target.to_numpy()
Predicted=Predicted.to_numpy()

## Pearson Correlation

In [39]:
num_celltypes=Predicted.shape[1]
d = np.zeros(num_celltypes)
for i in range(0,num_celltypes):
    p=Predicted[:,i]
    t=Target[:,i]
    prs=(np.corrcoef(p,t,rowvar=False))
    d[i]=prs[0,1]

In [40]:
np.mean(np.nan_to_num(d))

0.8488873786950308

In [41]:
d

array([0.6307829 , 0.88637707, 0.79287455, 0.90448357, 0.81795552,
       0.81493717, 0.93568175, 0.90482167, 0.81062021, 0.93856142,
       0.8672554 , 0.90270871, 0.82847599])

## Jenson Shannon Divergence

In [42]:
jns=distance.jensenshannon(Predicted,Target,axis=1)
jns=jns[~np.isinf(jns).squeeze()]
np.mean(jns)

0.26996435226346865

## F1 Score, Precision, Recall

In [43]:
thresholds={0.001,0.01,0.1,0.2,0.3}

In [44]:
d = pd.DataFrame(0, index=np.arange(len(thresholds)), columns=["Precision","Recall","F1-score"])
d.index=thresholds
for t in thresholds:
    Predicted_logical=(Predicted>t).astype(int)
    Target_logical=(Target>t).astype(int)
    d.loc[t,"Precision"]=precision_score(Target_logical,Predicted_logical,average="micro")
    d.loc[t,"Recall"]=recall_score(Target_logical,Predicted_logical,average="micro")
    d.loc[t,"F1-score"]=f1_score(Target_logical,Predicted_logical,average="micro")

In [45]:
d

Unnamed: 0,Precision,Recall,F1-score
0.1,0.832556,0.836154,0.834351
0.2,0.852926,0.742542,0.793916
0.3,0.791284,0.66947,0.725298
0.01,0.585325,0.945032,0.722904
0.001,0.513107,0.955616,0.6677


## KL Divergence

In [46]:
scipy.stats.wasserstein_distance(np.reshape(Target,(Target.shape[0]*Target.shape[1])),np.reshape(Predicted,(Predicted.shape[0]*Predicted.shape[1])))

0.012580057109969707

In [47]:
to_be_kept=(Predicted>0)

In [48]:
Predicted=Predicted[to_be_kept]
Target=Target[to_be_kept]

In [49]:
kl=scipy.special.rel_entr(Target,Predicted)

In [50]:
sum(kl)/len(kl)

0.03540200901425532

In [27]:
len(Predicted)

3058

In [1179]:
np.reshape(Target,(Target.shape[0]*Target.shape[1])).shape

(8000,)