In [None]:
import scipy.stats as st
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
## train on T-drive
############################################################################
t_drive = np.array([89.8, 96.4, 93, 96.8])           # values of test on t drive
cabspotting = np.array([90.5, 89.5, 91.75, 89.75])      # values of test on cabspotting
kaggle = np.array([71.5, 75, 74, 74])           # values of test on kaggle
############################################################################
plt.figure(figsize=(9,5))
sns.kdeplot(t_drive,shade=True,color='black')
sns.kdeplot(cabspotting,shade=True,color='red')
sns.kdeplot(kaggle,shade=True,color='Blue')
plt.legend(['test on T-drive','test on Cabspotting','test on ECML-PKDD 15'],fontsize=8)
plt.vlines(x=t_drive.mean(),ymin=0,ymax=0.09,color='black',linestyle='--')
plt.vlines(x=cabspotting.mean(),ymin=0,ymax=0.09,color='red',linestyle='--')
plt.vlines(x=kaggle.mean(),ymin=0,ymax=0.09,color='Blue',linestyle='--')
plt.title('Train on T-drive')
plt.figtext(0.5, -0.03, "hypothesis of same distribution", ha="center", fontsize=12, bbox={"facecolor":"white", "alpha":0.5, "pad":5})
_,p_1=st.kruskal(t_drive,cabspotting)
_,p_2=st.kruskal(t_drive,kaggle)
p_1_con="reject"
p_2_con="reject"
if p_1>0.05:
    p_1_con="fails to reject"
if p_2>0.05:
    p_2_con="fails to reject"
plt.figtext(0.1, -0.1,"- p-value for T-drive and Capsbotting : {:.3f}, {}".format(p_1, p_1_con),fontsize=12)
plt.figtext(0.1, -0.15,"- p-value for T-drive and ECMl-PKDD : {:.3f}, {}".format(p_2, p_2_con),fontsize=12)
plt.savefig("t_drive.jpg", bbox_inches="tight", format='jpg')
plt.show()

In [None]:
## train on cabspotting
############################################################################
t_drive = np.array([86.75, 88.5, 87.5, 88])            # values of test on t drive
cabspotting = np.array([78.2, 96.8, 97.6, 98.2])            # values of test on cabspotting
kaggle = np.array([84, 89, 90.5, 88.75])                  # values of test on kaggle
############################################################################
plt.figure(figsize=(9,5))
sns.kdeplot(t_drive,shade=True,color='black')
sns.kdeplot(cabspotting,shade=True,color='red')
sns.kdeplot(kaggle,shade=True,color='Blue')
plt.legend(['test on T-drive','test on Cabspotting','test on ECML-PKDD 15'],fontsize=8)
plt.vlines(x=t_drive.mean(),ymin=0,ymax=0.09,color='black',linestyle='--')
plt.vlines(x=cabspotting.mean(),ymin=0,ymax=0.09,color='red',linestyle='--')
plt.vlines(x=kaggle.mean(),ymin=0,ymax=0.09,color='Blue',linestyle='--')
plt.title('Train on Cabspotting')
plt.figtext(0.5, -0.03, "hypothesis of same distribution", ha="center", fontsize=12, bbox={"facecolor":"white", "alpha":0.5, "pad":5})
_,p_1=st.kruskal(cabspotting,t_drive)
_,p_2=st.kruskal(cabspotting,kaggle)
p_1_con="reject"
p_2_con="reject"
if p_1>0.05:
    p_1_con="fails to reject"
if p_2>0.05:
    p_2_con="fails to reject"
plt.figtext(0.1, -0.1,"- p-value for Capsbotting and T-drive : {:.3f}, {}".format(p_1, p_1_con),fontsize=12)
plt.figtext(0.1, -0.15,"- p-value for Capsbotting and ECMl-PKDD : {:.3f}, {}".format(p_2, p_2_con),fontsize=12)
plt.savefig("Cabspotting.jpg", bbox_inches="tight", format='jpg')
plt.show()

In [None]:
## train on kaggle
############################################################################
t_drive = np.array([88.25, 87, 89, 89.5])               # values of test on t drive
cabspotting = np.array([91.75, 94.25, 92.25, 93.75])            # values of test on cabspotting
kaggle = np.array([92, 88.8, 89.6, 92.6])                      # values of test on kaggle
############################################################################
plt.figure(figsize=(9,5))
sns.kdeplot(t_drive,shade=True,color='black')
sns.kdeplot(cabspotting,shade=True,color='red')
sns.kdeplot(kaggle,shade=True,color='Blue')
plt.legend(['test on T-drive','test on Cabspotting','test on ECML-PKDD 15'],fontsize=8)
plt.vlines(x=t_drive.mean(),ymin=0,ymax=0.09,color='black',linestyle='--')
plt.vlines(x=cabspotting.mean(),ymin=0,ymax=0.09,color='red',linestyle='--')
plt.vlines(x=kaggle.mean(),ymin=0,ymax=0.09,color='Blue',linestyle='--')
plt.title('Train on ECML-PKDD 15')
plt.figtext(0.5, -0.03, "hypothesis of same distribution", ha="center", fontsize=12, bbox={"facecolor":"white", "alpha":0.5, "pad":5})
_,p_1=st.kruskal(kaggle,t_drive)
_,p_2=st.kruskal(kaggle,cabspotting)
p_1_con="reject"
p_2_con="reject"
if p_1>0.05:
    p_1_con="fails to reject"
if p_2>0.05:
    p_2_con="fails to reject"
plt.figtext(0.1, -0.1,"- p-value for ECML-PKDD  and T-drive : {:.3f}, {}".format(p_1, p_1_con),fontsize=12)
plt.figtext(0.1, -0.15,"- p-value for ECML-PKDD  and Cabspotting : {:.3f}, {}".format(p_2, p_2_con),fontsize=12)
plt.savefig("kaggle.jpg", bbox_inches="tight", format='jpg')
plt.show()