In [25]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score
import os
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, confusion_matrix, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
from scipy import stats

In [7]:
ds_version_1 = "V1_0"
data_path_1 = os.path.join("..", "data", ds_version_1, "metadata")

ds_version_2 = "RTK"
data_path_2 = os.path.join("..", "data", ds_version_2, "metadata")


In [None]:
#define F-test function for variance test
def f_test(x, y):
    x = np.array(x)
    y = np.array(y)
    f = np.var(x, ddof=1)/np.var(y, ddof=1) #calculate F test statistic 
    dfn = x.size-1 #define degrees of freedom numerator 
    dfd = y.size-1 #define degrees of freedom denominator 
    p = 1-stats.f.cdf(f, dfn, dfd) #find p-value of F test statistic 
    return f, p

# Correlation

In [51]:
report_1 = pd.read_csv(os.path.join(data_path_1, "corr_V1_0.csv")).set_index("Class")
report_2 = pd.read_csv(os.path.join(data_path_2, "corr_RTK.csv")).set_index("Class")

In [52]:
df_n_1 = report_1[[col for col in report_1.columns if "Spearman_Coefficient" in col]].rename(index={'asphalt-concrete': 'asphalt', 'paving_stones-sett': 'paved'})
df_n_2 = report_2[[col for col in report_2.columns if "Spearman_Coefficient" in col]]

In [53]:
print(df_n_1)
print(df_n_2)

         42_Spearman_Coefficient  1024_Spearman_Coefficient  \
Class                                                         
asphalt                 0.201041                   0.274003   
paved                  -0.131257                  -0.093122   
unpaved                -0.133933                  -0.127584   
Overall                 0.245464                   0.177517   

         3_Spearman_Coefficient  57_Spearman_Coefficient  \
Class                                                      
asphalt                0.301464                 0.302448   
paved                 -0.053721                 0.052579   
unpaved               -0.213642                -0.183255   
Overall                0.016474                 0.272948   

         1000_Spearman_Coefficient  
Class                               
asphalt                   0.170723  
paved                     0.002186  
unpaved                  -0.109928  
Overall                   0.085159  
         42_Spearman_Coefficient  1024

### Variance test

In [54]:
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    # print(n_1)
    # print(n_2)
    print(surface)
    f, p = f_test(n_1, n_2)
    print(f"F test statistic: {f}, p value: {p}")
    f, p = f_test(n_2, n_1)
    print(f"F test statistic: {f}, p value: {p}")

asphalt
F test statistic: 9.07117152351407, p value: 0.027619589028191505
F test statistic: 0.11023934421345956, p value: 0.9723804109718085
paved
F test statistic: 7.583245073430153, p value: 0.03755815357311576
F test statistic: 0.13186966665547403, p value: 0.9624418464268841
unpaved
F test statistic: 0.09731499458976337, p value: 0.9778001030073022
F test statistic: 10.275908704671403, p value: 0.022199896992697754
Overall
F test statistic: 1.5151934692465716, p value: 0.3485241017593743
F test statistic: 0.6599817253022143, p value: 0.6514758982406257


### Significance test

In [55]:
# T test
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    t, p = stats.ttest_ind(n_1, n_2, equal_var=True, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")
    t, p = stats.ttest_ind(n_1, n_2, equal_var=True, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")

asphalt
T test statistic: -15.136182247378247, p value: 3.5926050745354854e-07
T test statistic: -15.136182247378247, p value: 3.5926050745354854e-07
paved
T test statistic: -13.663131938555424, p value: 7.928454106158421e-07
T test statistic: -13.663131938555424, p value: 7.928454106158421e-07
unpaved
T test statistic: -7.274008097153627, p value: 8.60254578124228e-05
T test statistic: -7.274008097153627, p value: 8.60254578124228e-05
Overall
T test statistic: -5.747111709434142, p value: 0.0004304195508509053
T test statistic: -5.747111709434142, p value: 0.0004304195508509053


In [56]:
# Welch test
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    t, p = stats.ttest_ind(n_1, n_2, equal_var=False, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")
    t, p = stats.ttest_ind(n_1, n_2, equal_var=False, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")

asphalt
T test statistic: -15.136182247378247, p value: 2.7770711550145295e-05
T test statistic: -15.136182247378247, p value: 2.7770711550145295e-05
paved
T test statistic: -13.663131938555424, p value: 3.573885684141629e-05
T test statistic: -13.663131938555424, p value: 3.573885684141629e-05
unpaved
T test statistic: -7.274008097153627, p value: 0.0009352138410203934
T test statistic: -7.274008097153627, p value: 0.0009352138410203934
Overall
T test statistic: -5.747111709434142, p value: 0.0005007195343149048
T test statistic: -5.747111709434142, p value: 0.0005007195343149048


In [57]:
# Mann-Whitney U test
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    u, p = stats.mannwhitneyu(n_1, n_2, alternative="two-sided")
    print(f"U test statistic: {u}, p value: {p}")

asphalt
U test statistic: 0.0, p value: 0.007936507936507936
paved
U test statistic: 0.0, p value: 0.007936507936507936
unpaved
U test statistic: 0.0, p value: 0.007936507936507936
Overall
U test statistic: 0.0, p value: 0.007936507936507936


# Classification

In [36]:
report_1 = pd.read_csv(os.path.join(data_path_1, "classification_V1_0.csv")).set_index("Class")
report_2 = pd.read_csv(os.path.join(data_path_2, "classification_RTK.csv")).set_index("Class")
print(report_1)
print(report_2)

              42_precision  42_recall  42_f1-score   42_support  \
Class                                                             
asphalt           0.764398   0.947596     0.846195  4160.000000   
paved             0.814369   0.769585     0.791344  3255.000000   
unpaved           0.876106   0.106337     0.189655   931.000000   
accuracy          0.784328   0.784328     0.784328     0.784328   
macro avg         0.818291   0.607840     0.609065  8346.000000   
weighted avg      0.796348   0.784328     0.751566  8346.000000   

              1024_precision  1024_recall  1024_f1-score  1024_support  \
Class                                                                    
asphalt             0.700614     0.987260       0.819597    4160.00000   
paved               0.956416     0.728111       0.826792    3255.00000   
unpaved             0.666667     0.004296       0.008538     931.00000   
accuracy            0.776540     0.776540       0.776540       0.77654   
macro avg          

## Precision

In [48]:
df_n_1 = report_1[[col for col in report_1.columns if "precision" in col]]
df_n_2 = report_2[[col for col in report_2.columns if "precision" in col]]

# Variance test
print("Precision: Variance test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    # print(n_1)
    # print(n_2)
    print(surface)
    f, p = f_test(n_1, n_2)
    print(f"F test statistic: {f}, p value: {p}")
    f, p = f_test(n_2, n_1)
    print(f"F test statistic: {f}, p value: {p}")

# T test
print("\nPrecision: T test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    t, p = stats.ttest_ind(n_1, n_2, equal_var=True, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")

# Welch test
print("\nPrecision: Welch test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    t, p = stats.ttest_ind(n_1, n_2, equal_var=False, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")

# Mann-Whitney U test
print("\nPrecision: Mann-Whitney U test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    u, p = stats.mannwhitneyu(n_1, n_2, alternative="two-sided")
    print(f"U test statistic: {u}, p value: {p}")

Precision: Variance test
asphalt
F test statistic: 0.6646421936393029, p value: 0.6490504429110169
F test statistic: 1.5045689388517722, p value: 0.3509495570889831
paved
F test statistic: 1.020296378327048, p value: 0.49246557495427434
F test statistic: 0.9801073700170068, p value: 0.5075344250457257
unpaved
F test statistic: 0.5042268087954621, p value: 0.7382395073473631
F test statistic: 1.9832344939946391, p value: 0.2617604926526369
accuracy
F test statistic: 1.0571033010241067, p value: 0.479186035396735
F test statistic: 0.9459813426286856, p value: 0.520813964603265
macro avg
F test statistic: 1.2699030287491322, p value: 0.41124146346684953
F test statistic: 0.7874617016899397, p value: 0.5887585365331506
weighted avg
F test statistic: 0.8577349136418722, p value: 0.5573226225430388
F test statistic: 1.165861368233318, p value: 0.44267737745696123

Precision: T test
asphalt
T test statistic: -3.1832832445154553, p value: 0.012933414951161674
paved
T test statistic: -1.2464639

In [43]:
## Recall

In [49]:
df_n_1 = report_1[[col for col in report_1.columns if "recall" in col]]
df_n_2 = report_2[[col for col in report_2.columns if "recall" in col]]

# Variance test
print("Recall: Variance test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    # print(n_1)
    # print(n_2)
    print(surface)
    f, p = f_test(n_1, n_2)
    print(f"F test statistic: {f}, p value: {p}")
    f, p = f_test(n_2, n_1)
    print(f"F test statistic: {f}, p value: {p}")

# T test
print("\nRecall: T test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    t, p = stats.ttest_ind(n_1, n_2, equal_var=True, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")

# Welch test
print("\nRecall: Welch test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    t, p = stats.ttest_ind(n_1, n_2, equal_var=False, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")

# Mann-Whitney U test
print("\nRecall: Mann-Whitney U test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    u, p = stats.mannwhitneyu(n_1, n_2, alternative="two-sided")
    print(f"U test statistic: {u}, p value: {p}")

Recall: Variance test
asphalt
F test statistic: 0.12232817403792688, p value: 0.9669499638749031
F test statistic: 8.174731682743486, p value: 0.03305003612509694
paved
F test statistic: 1.1910339131456071, p value: 0.4347739994663029
F test statistic: 0.8396066551614196, p value: 0.565226000533697
unpaved
F test statistic: 0.06680935059936757, p value: 0.9887254079026713
F test statistic: 14.967964679026025, p value: 0.011274592097328728
accuracy
F test statistic: 1.0571033010241067, p value: 0.479186035396735
F test statistic: 0.9459813426286856, p value: 0.520813964603265
macro avg
F test statistic: 0.48329799434103377, p value: 0.7506924751925295
F test statistic: 2.0691168010400665, p value: 0.24930752480747054
weighted avg
F test statistic: 1.0571033010241067, p value: 0.479186035396735
F test statistic: 0.9459813426286856, p value: 0.520813964603265

Recall: T test
asphalt
T test statistic: 1.1817300974087206, p value: 0.2712466546308358
paved
T test statistic: 0.711892731767592

## F1 score

In [50]:
df_n_1 = report_1[[col for col in report_1.columns if "f1-score" in col]]
df_n_2 = report_2[[col for col in report_2.columns if "f1-score" in col]]

# Variance test
print("F1 score: Variance test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    # print(n_1)
    # print(n_2)
    print(surface)
    f, p = f_test(n_1, n_2)
    print(f"F test statistic: {f}, p value: {p}")
    f, p = f_test(n_2, n_1)
    print(f"F test statistic: {f}, p value: {p}")

# T test
print("\nF1 score: T test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    t, p = stats.ttest_ind(n_1, n_2, equal_var=True, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")

# Welch test
print("\nF1 score: Welch test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    t, p = stats.ttest_ind(n_1, n_2, equal_var=False, alternative="two-sided")
    print(f"T test statistic: {t}, p value: {p}")

# Mann-Whitney U test
print("\nF1 score: Mann-Whitney U test")
for surface in df_n_1.index:
    n_1 = df_n_1.loc[surface].values
    n_2 = df_n_2.loc[surface].values
    print(surface)
    u, p = stats.mannwhitneyu(n_1, n_2, alternative="two-sided")
    print(f"U test statistic: {u}, p value: {p}")

F1 score: Variance test
asphalt
F test statistic: 0.7438794374087068, p value: 0.6093592075006311
F test statistic: 1.344303861232521, p value: 0.3906407924993689
paved
F test statistic: 1.4292063764602512, p value: 0.36886455742116375
F test statistic: 0.699689013756518, p value: 0.6311354425788361
unpaved
F test statistic: 1.4350709541990645, p value: 0.3674243653533835
F test statistic: 0.6968296564528516, p value: 0.6325756346466163
accuracy
F test statistic: 1.0571033010241067, p value: 0.479186035396735
F test statistic: 0.9459813426286856, p value: 0.520813964603265
macro avg
F test statistic: 0.9127287027990264, p value: 0.5341961997855691
F test statistic: 1.0956158132568226, p value: 0.4658038002144309
weighted avg
F test statistic: 1.1110534549487507, p value: 0.46058211610455335
F test statistic: 0.9000467039150035, p value: 0.5394178838954466

F1 score: T test
asphalt
T test statistic: -2.902687811783134, p value: 0.019809896687746978
paved
T test statistic: 0.434254990322