# Author: Amin Norouzi Kandlati

# Accuracy assessment of double cropping paper
This notebook is based on the methodologies described in the following paper:

Stehman, Stephen V. "Estimating area and map accuracy for stratified
random sampling when the strata are different from the map classes."
International Journal of Remote Sensing 35.13 (2014): 4923-4939.

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from collections import defaultdict

import os, os.path, sys

In [2]:
sys.path.append("/Users/hn/Documents/00_GitHub/Ag/NASA/Python_codes/")
import NASA_core as nc

2024-08-13 11:34:25.117928: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
path_to_data = (
    "/Users/aminnorouzi/Library/CloudStorage/"
    "OneDrive-WashingtonStateUniversity(email.wsu.edu)/Ph.D/"
    "Projects/Double_Crop_Mapping/"
)

path_to_data = "/Users/hn/Documents/01_research_data/NASA/Amin/"

In [4]:
file_path = path_to_data + "six_OverSam_TestRes_and_InclusionProb.sav"
data = pd.read_pickle(file_path)
field_info = data["field_info"][["ID", "ExctAcr"]]
test_set = data["six_OverSam_TestRes"]["test_results_DL"]["train_ID1"]["a_test_set_df"]
cm = confusion_matrix(test_set["NDVI_SG_DL_p3"], test_set["Vote"])

In [5]:
prob = data["six_OverSam_TestRes"]["inclusion_prob"]
test_set = test_set.merge(prob, on="CropTyp", how="right")
test_set = test_set.merge(field_info, on="ID", how="inner")
test_set

Unnamed: 0,ID,Vote,prob_single,train_test,train_ID,SR,NDVI_SG_DL_p3,CropTyp,numer,denom,inclusion_prob,numer_acr,denom_acr,ExctAcr
0,51870_WSDA_SF_2016,1.0,1.000000e+00,test,1.0,5.0,1.0,alfalfa hay,43,3343,0.013,2564.155,133731.871,52.830424
1,51929_WSDA_SF_2016,1.0,1.000000e+00,test,1.0,5.0,1.0,alfalfa hay,43,3343,0.013,2564.155,133731.871,65.795078
2,56147_WSDA_SF_2016,2.0,4.410554e-06,test,1.0,5.0,2.0,alfalfa hay,43,3343,0.013,2564.155,133731.871,19.044372
3,56431_WSDA_SF_2016,1.0,1.000000e+00,test,1.0,5.0,1.0,alfalfa hay,43,3343,0.013,2564.155,133731.871,123.435265
4,104119_WSDA_SF_2016,1.0,1.000000e+00,test,1.0,5.0,1.0,alfalfa hay,43,3343,0.013,2564.155,133731.871,26.413875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
627,23202_WSDA_SF_2018,1.0,1.000000e+00,test,1.0,5.0,1.0,wildlife feed,27,47,0.574,1065.289,1140.471,11.387414
628,54907_WSDA_SF_2016,2.0,8.809784e-08,test,1.0,5.0,2.0,yellow mustard,20,29,0.690,1622.112,1658.878,49.009076
629,108354_WSDA_SF_2018,2.0,2.504894e-06,test,1.0,5.0,2.0,yellow mustard,20,29,0.690,1622.112,1658.878,40.572863
630,105033_WSDA_SF_2018,2.0,9.999999e-01,test,1.0,5.0,1.0,yellow mustard,20,29,0.690,1622.112,1658.878,29.508786


In [6]:
id_dict = defaultdict(list)
for idx, row in test_set.iterrows():
    id_dict[(row["Vote"], row["NDVI_SG_DL_p3"]), row["CropTyp"]].append(
        (row["ID"], row["inclusion_prob"], row["ExctAcr"]))

## Overall accuracy

In [7]:
# n_star_h not defined yet
# n_star_h - len(A_yu_list)

In [8]:
A_N = data["six_OverSam_TestRes"]["inclusion_prob"]["denom_acr"].sum()
N = sum(data["six_OverSam_TestRes"]["inclusion_prob"]["denom"])

In [9]:
acr_data = data["six_OverSam_TestRes"]["inclusion_prob"]
master_dict = defaultdict(list)
# Numbers of strata
for strata in test_set["CropTyp"].unique():
    strata_subset = {key: value for key, value in id_dict.items() if key[1] == strata}
    A_n_star_h_list = [
        value[2] for key, values in strata_subset.items() for value in values
    ]
    A_n_star_h = sum(A_n_star_h_list)

    index = acr_data[acr_data["CropTyp"] == strata].index[0]
    # Now use .at to access the specific value
    A_N_star_h = acr_data.at[index, "denom_acr"]
    N_star_h = acr_data.at[index, "denom"]
    n_star_h = len(A_n_star_h_list)

    master_dict[(strata, "n_star_h")].append(n_star_h)
    master_dict[(strata, "A_n_star_h")].append(A_n_star_h)
    master_dict[(strata, "A_N_star_h")].append(A_N_star_h)
    master_dict[(strata, "N_star_h")].append(N_star_h)

    A_yu_list = [
        value[2]
        for key, values in strata_subset.items()
        for value in values
        if key[0][0] == key[0][1]
    ]
    A_yu = sum(A_yu_list)

    y_bar_h = A_yu / A_n_star_h

    # Sample variance (based on counts not area)
    y_bar_h_count = len(A_yu_list) / master_dict[(strata, "n_star_h")][0]
    yu_0_1 = np.append(np.ones(len(A_yu_list)), np.zeros(n_star_h - len(A_yu_list)))
    sy_h_2 = sum((yu_0_1 - y_bar_h_count) ** 2 / master_dict[(strata, "n_star_h")][0])

    master_dict[strata, "y_bar_h"].append(y_bar_h)
    master_dict[strata, "sy_h_2"].append(sy_h_2)

In [10]:
master_df = nc.dict_to_df(master_dict)
master_df = master_df.dropna()

In [15]:
Y_bar_list = []
v_list = []
v_list_countbased = []
for strata in master_df["strata"].unique():
    A_N_star_h = master_df.loc[master_df["strata"] == strata, "A_N_star_h"].values[0]
    A_n_star_h = master_df.loc[master_df["strata"] == strata, "A_n_star_h"].values[0]
    sy_h_2 = master_df.loc[master_df["strata"] == strata, "sy_h_2"].values[0]
    y_bar_h = master_df.loc[master_df["strata"] == strata, "y_bar_h"].values[0]

    Y_bar_list.append(A_N_star_h * y_bar_h)

    v_list.append(A_N_star_h**2 * (1 - A_n_star_h / A_N_star_h) * sy_h_2 / A_n_star_h)

    # v_list_countbased.append(
    #     N_star_h**2
    #     * (1 - n_star_h_dict[strata][0] / N_star_h)
    #     * s_yh_2_dict[strata][0]
    #     / n_star_h_dict[strata][0]
    # )

Overall_acc = sum(Y_bar_list) / A_N
print("Overall Accuracy = ", Overall_acc)
# Variance of overall accuracy
v_o = (1 / (A_N**2)) * sum(v_list)
# v_o_countbased = (1 / N**2) * sum(v_list_countbased)
print("Area-based Variance of overall accuracy = ", v_o)
# print("Count-based Variance of overall accuracy = ", v_o_countbased)
# -

Overall Accuracy =  0.1095150585631535
Area-based Variance of overall accuracy =  1.577000261748824e-05


In [12]:
master_df.head(3)

Unnamed: 0,strata,A_N_star_h,A_n_star_h,N_star_h,n_star_h,sy_h_2,y_bar_h
0,alfalfa hay,133731.871,793.873165,3343,12,0.0,1.0
1,alfalfa seed,13840.953,57.182891,337,3,0.0,1.0
2,apple,120944.544,2941.767314,7455,121,0.0,1.0


### User and Producer Accuracy

In [19]:
c = 2  # We have two classes: 1 and 2

for c in [1, 2]:
    ######################################################################
    ######################################################################
    # USER ACCURACY AND SE
    ######################################################################
    ######################################################################
    # Filter for instances that are mapped as c.
    c_dict = {key: value for key, value in id_dict.items() if key[0][0] == c}
    # Filter for instances that are mapped as c and referenced as c, too (cc).
    cc_dict = {
        key: value
        for key, value in id_dict.items()
        if (key[0][0] == c and key[0][1] == c)
    }

    # List stratas for c and cc
    c_strata_list = [key[1] for key, _ in c_dict.items()] # X
    cc_strata_list = [key[1] for key, _ in cc_dict.items()] # Y

    # ##### Calculate numerator sum
    acr_data = data["six_OverSam_TestRes"]["inclusion_prob"]

    master_dict = defaultdict(list)
    # Numbers of strata
    for strata in test_set["CropTyp"].unique():
        strata_subset = {
            key: value for key, value in id_dict.items() if key[1] == strata
        }
        A_n_star_h_list = [
            value[2] for key, values in strata_subset.items() for value in values
        ]
        A_n_star_h = sum(A_n_star_h_list)

        index = acr_data[acr_data["CropTyp"] == strata].index[0]
        # Now use .at to access the specific value
        A_N_star_h = acr_data.at[index, "denom_acr"]
        N_star_h = acr_data.at[index, "denom"]

        master_dict[(strata, "n_star_h")].append(len(A_n_star_h_list))
        master_dict[(strata, "A_n_star_h")].append(A_n_star_h)
        master_dict[(strata, "A_N_star_h")].append(A_N_star_h)
        master_dict[(strata, "N_star_h")].append(N_star_h)

    for strata in np.unique(np.array(cc_strata_list)):
        strata_subset = {
            key: value for key, value in cc_dict.items() if key[1] == strata
        }

        A_yu_list = [
            value[2]
            for key, values in strata_subset.items()
            for value in values
            if key[0][0] == key[0][1]
        ]
        yu_IDs = np.array(
            [
                value[0]
                for key, values in strata_subset.items()
                for value in values
                if key[0][0] == key[0][1]
            ]
        )
        A_yu = sum(A_yu_list)

        # Sample variance (based on counts not area)
        y_bar_h_count = len(A_yu_list) / master_dict[(strata, "n_star_h")][0]
        sy_h_2 = (len(A_yu_list) - y_bar_h_count) ** 2 / master_dict[
            (strata, "n_star_h")
        ][0]

        master_dict[(strata, "n_yu")].append(len(A_yu_list))
        master_dict[(strata, "yu_IDs")].append(yu_IDs)
        master_dict[(strata, "y_bar_h")].append(
            A_yu / master_dict[(strata, "A_n_star_h")][0]
        )
        master_dict[(strata, "y_bar_h_count")].append(y_bar_h_count)
        master_dict[(strata, "sy_h_2")].append(sy_h_2)
        master_dict[(strata, "Y_bar")].append(
            master_dict[(strata, "A_N_star_h")][0] * master_dict[(strata, "y_bar_h")][0]
        )
        # master_dict[(strata, "v_y_list")].append(
        #     A_N_star_h**2
        #     * (1 - A_n_star_h_dict[strata][0] / A_N_star_h)
        #     * master_dict[(strata, "sy_h_2")][0]
        #     / master_dict[(strata, "A_n_star_h")][0]
        # )

    ###########  Calculate denominator sum  ###########
    for strata in np.unique(np.array(c_strata_list)):
        strata_subset = {
            key: value for key, value in c_dict.items() if key[1] == strata
        }

        A_xu_list = [
            value[2] for key, values in strata_subset.items() for value in values
        ]
        xu_IDs = np.array(
            [value[0] for key, values in strata_subset.items() for value in values]
        )
        A_xu = sum(A_xu_list)

        # Sample variance (based on counts not area)
        x_bar_h_count = len(A_xu_list) / master_dict[(strata, "n_star_h")][0]
        sx_h_2 = (len(A_xu_list) - x_bar_h_count) ** 2 / master_dict[
            (strata, "n_star_h")
        ][0]

        master_dict[(strata, "n_xu")].append(len(A_xu_list))
        master_dict[(strata, "xu_IDs")].append(xu_IDs)
        master_dict[(strata, "x_bar_h")].append(
            A_xu / master_dict[(strata, "A_n_star_h")][0]
        )
        master_dict[(strata, "x_bar_h_count")].append(x_bar_h_count)
        master_dict[(strata, "sx_h_2")].append(sx_h_2)
        master_dict[(strata, "X_bar")].append(
            master_dict[(strata, "A_N_star_h")][0] * master_dict[(strata, "x_bar_h")][0]
        )
        # master_dict[(strata, "v_x_list")].append(
        #     A_N_star_h_x**2
        #     * (1 - A_n_star_h_dict[strata][0] / A_N_star_h)
        #     * master_dict[(strata, "sy_h_2")][0]
        #     / master_dict[(strata, "A_n_star_h")][0]
        # )

    master_dict = {key: master_dict[key] for key in sorted(master_dict.keys())}
    master_dict = defaultdict(list, master_dict)

    # put yu and xu of 0 - 1s in the master dict
    xu_id = {key[0]: np.array(sorted(value)) for key, values in master_dict.items()
             for value in values if key[1] == "xu_IDs"}
    yu_id = {key[0]: np.array(sorted(value)) for key, values in master_dict.items() for value in values
             if key[1] == "yu_IDs"}

    for key, value in xu_id.items():
        if key not in yu_id:
            master_dict[(key, "yu_0_1")].append(np.zeros(len(xu_id[key])))
        else:
            yu_in_xu_0_1 = np.array((np.isin(xu_id[key], yu_id[key])).astype(int))
            master_dict[(key, "xu_0_1")].append(np.ones(len(yu_in_xu_0_1)))
            master_dict[(key, "yu_0_1")].append(yu_in_xu_0_1)

    master_dict = {key: master_dict[key] for key in sorted(master_dict.keys())}
    master_dict = defaultdict(list, master_dict)

    # Convert master_dict to a dataframe
    master_df = nc.dict_to_df(master_dict)
    master_df = master_df.dropna()

    # Calculate s_xy_h
    for strata in master_df["strata"].unique():
        yu = master_df.loc[master_df["strata"] == strata, "yu_0_1"].values[0]
        xu = master_df.loc[master_df["strata"] == strata, "xu_0_1"].values[0]
        ybar_h = master_df.loc[master_df["strata"] == strata, "y_bar_h_count"].values[0]
        xbar_h = master_df.loc[master_df["strata"] == strata, "x_bar_h_count"].values[0]
        n_star_h = master_df.loc[master_df["strata"] == strata, "n_star_h"].values[0]

        s_xy_h = sum((yu - ybar_h) * (xu - xbar_h) / n_star_h - 1)
        master_df.loc[master_df["strata"] == strata, "s_xy_h"] = s_xy_h

        # Calculate X_hat
        A_N_star_h = master_df.loc[master_df["strata"] == strata, "A_N_star_h"].values[0]
        x_hat = A_N_star_h * xbar_h
        master_df.loc[master_df["strata"] == strata, "x_hat"] = x_hat

    # Calculate user accuracy
    Y_bar_list = [value[0] for key, value in master_dict.items() if key[1] == "Y_bar"]
    numerator_sum = sum(Y_bar_list)

    X_bar_list = [value[0] for key, value in master_dict.items() if key[1] == "X_bar"]
    denominator_sum = sum(X_bar_list)

    users_acc = numerator_sum / denominator_sum
    print("Class: ", c)
    print((numerator_sum, denominator_sum))
    print("Area-based user accuracy = ", users_acc)

    # Calculate variance of user accuracy
    v_sum_list = []
    for strata in master_df["strata"].unique():
        A_N_star_h = master_df.loc[master_df["strata"] == strata, "A_N_star_h"].values[0]
        A_n_star_h = master_df.loc[master_df["strata"] == strata, "A_n_star_h"].values[0]
        sy_h_2 = master_df.loc[master_df["strata"] == strata, "sy_h_2"].values[0]
        sx_h_2 = master_df.loc[master_df["strata"] == strata, "sx_h_2"].values[0]
        s_xy_h = master_df.loc[master_df["strata"] == strata, "s_xy_h"].values[0]

        v_sum_list.append(A_N_star_h**2 * (1 - A_n_star_h / A_N_star_h)
            * (sy_h_2 + users_acc**2 * sx_h_2 - 2 * users_acc * s_xy_h)
            / A_n_star_h)

    v_u = (1 / master_df["x_hat"].sum()) * sum(v_sum_list)
    print("Area-based standard error of user accuracy = ", np.sqrt(v_u))

    ######################################################################
    ######################################################################
    # PRODUCER ACCURACY AND SE
    ######################################################################
    ######################################################################

    # Filter for instances that are mapped as c.
    c_dict = {key: value for key, value in id_dict.items() if key[0][1] == c}
    # Filter for instances that are mapped as c and referenced as c, too (cc).
    cc_dict = {key: value for key, value in id_dict.items() if (key[0][0] == c and key[0][1] == c)}

    # List stratas for c and cc
    c_strata_list = [key[1] for key, _ in c_dict.items()] # X
    cc_strata_list = [key[1] for key, _ in cc_dict.items()]# Y

    # ##### Calculate numerator sum
    acr_data = data["six_OverSam_TestRes"]["inclusion_prob"]

    master_dict = defaultdict(list)
    # Numbers of strata
    for strata in test_set["CropTyp"].unique():
        strata_subset = {
            key: value for key, value in id_dict.items() if key[1] == strata
        }
        A_n_star_h_list = [
            value[2] for key, values in strata_subset.items() for value in values
        ]
        A_n_star_h = sum(A_n_star_h_list)

        index = acr_data[acr_data["CropTyp"] == strata].index[0]
        # Now use .at to access the specific value
        A_N_star_h = acr_data.at[index, "denom_acr"]
        N_star_h = acr_data.at[index, "denom"]

        master_dict[(strata, "n_star_h")].append(len(A_n_star_h_list))
        master_dict[(strata, "A_n_star_h")].append(A_n_star_h)
        master_dict[(strata, "A_N_star_h")].append(A_N_star_h)
        master_dict[(strata, "N_star_h")].append(N_star_h)

    for strata in np.unique(np.array(cc_strata_list)):
        strata_subset = {
            key: value for key, value in cc_dict.items() if key[1] == strata
        }

        A_yu_list = [value[2] for key, values in strata_subset.items()
                     for value in values if key[0][0] == key[0][1]]
        yu_IDs = np.array([value[0] for key, values in strata_subset.items()
                           for value in values if key[0][0] == key[0][1]])
        A_yu = sum(A_yu_list)

        # Sample variance (based on counts not area)
        y_bar_h_count = len(A_yu_list) / master_dict[(strata, "n_star_h")][0]
        sy_h_2 = (len(A_yu_list) - y_bar_h_count) ** 2 / master_dict[
            (strata, "n_star_h")
        ][0]

        master_dict[(strata, "n_yu")].append(len(A_yu_list))
        master_dict[(strata, "yu_IDs")].append(yu_IDs)
        master_dict[(strata, "y_bar_h")].append(
            A_yu / master_dict[(strata, "A_n_star_h")][0]
        )
        master_dict[(strata, "y_bar_h_count")].append(y_bar_h_count)
        master_dict[(strata, "sy_h_2")].append(sy_h_2)
        master_dict[(strata, "Y_bar")].append(
            master_dict[(strata, "A_N_star_h")][0] * master_dict[(strata, "y_bar_h")][0]
        )
        # master_dict[(strata, "v_y_list")].append(
        #     A_N_star_h**2
        #     * (1 - A_n_star_h_dict[strata][0] / A_N_star_h)
        #     * master_dict[(strata, "sy_h_2")][0]
        #     / master_dict[(strata, "A_n_star_h")][0]
        # )

    ###########  Calculate denominator sum  ###########
    for strata in np.unique(np.array(c_strata_list)):
        strata_subset = {key: value for key, value in c_dict.items() if key[1] == strata}

        A_xu_list = [value[2] for key, values in strata_subset.items() for value in values]
        xu_IDs = np.array([value[0] for key, values in strata_subset.items() for value in values])
        A_xu = sum(A_xu_list)

        # Sample variance (based on counts not area)
        x_bar_h_count = len(A_xu_list) / master_dict[(strata, "n_star_h")][0]
        sx_h_2 = (len(A_xu_list) - x_bar_h_count) ** 2 / master_dict[
            (strata, "n_star_h")
        ][0]

        master_dict[(strata, "n_xu")].append(len(A_xu_list))
        master_dict[(strata, "xu_IDs")].append(xu_IDs)
        master_dict[(strata, "x_bar_h")].append(
            A_xu / master_dict[(strata, "A_n_star_h")][0]
        )
        master_dict[(strata, "x_bar_h_count")].append(x_bar_h_count)
        master_dict[(strata, "sx_h_2")].append(sx_h_2)
        master_dict[(strata, "X_bar")].append(
            master_dict[(strata, "A_N_star_h")][0] * master_dict[(strata, "x_bar_h")][0]
        )
        # master_dict[(strata, "v_x_list")].append(
        #     A_N_star_h_x**2
        #     * (1 - A_n_star_h_dict[strata][0] / A_N_star_h)
        #     * master_dict[(strata, "sy_h_2")][0]
        #     / master_dict[(strata, "A_n_star_h")][0]
        # )

    master_dict = {key: master_dict[key] for key in sorted(master_dict.keys())}
    master_dict = defaultdict(list, master_dict)

    # put yu and xu of 0 - 1s in the master dict
    xu_id = {key[0]: np.array(sorted(value)) for key, values in master_dict.items()
             for value in values if key[1] == "xu_IDs"}
    yu_id = {key[0]: np.array(sorted(value)) for key, values in master_dict.items() for value in values
             if key[1] == "yu_IDs"}

    for key, value in xu_id.items():
        if key not in yu_id:
            master_dict[(key, "yu_0_1")].append(np.zeros(len(xu_id[key])))
        else:
            yu_in_xu_0_1 = np.array((np.isin(xu_id[key], yu_id[key])).astype(int))
            master_dict[(key, "xu_0_1")].append(np.ones(len(yu_in_xu_0_1)))
            master_dict[(key, "yu_0_1")].append(yu_in_xu_0_1)

    master_dict = {key: master_dict[key] for key in sorted(master_dict.keys())}
    master_dict = defaultdict(list, master_dict)

    # Convert master_dict to a dataframe
    master_df = nc.dict_to_df(master_dict)
    master_df = master_df.dropna()

    # Calculate s_xy_h
    for strata in master_df["strata"].unique():
        yu = master_df.loc[master_df["strata"] == strata, "yu_0_1"].values[0]
        xu = master_df.loc[master_df["strata"] == strata, "xu_0_1"].values[0]
        ybar_h = master_df.loc[master_df["strata"] == strata, "y_bar_h_count"].values[0]
        xbar_h = master_df.loc[master_df["strata"] == strata, "x_bar_h_count"].values[0]
        n_star_h = master_df.loc[master_df["strata"] == strata, "n_star_h"].values[0]

        s_xy_h = sum((yu - ybar_h) * (xu - xbar_h) / n_star_h - 1)
        master_df.loc[master_df["strata"] == strata, "s_xy_h"] = s_xy_h

        # Calculate X_hat
        A_N_star_h = master_df.loc[master_df["strata"] == strata, "A_N_star_h"].values[0]
        x_hat = A_N_star_h * xbar_h
        master_df.loc[master_df["strata"] == strata, "x_hat"] = x_hat

    # Calculate user accuracy
    Y_bar_list = [value[0] for key, value in master_dict.items() if key[1] == "Y_bar"]
    numerator_sum = sum(Y_bar_list)

    X_bar_list = [value[0] for key, value in master_dict.items() if key[1] == "X_bar"]
    denominator_sum = sum(X_bar_list)

    users_acc = numerator_sum / denominator_sum
    print((numerator_sum, denominator_sum))
    print("Area-based user producer = ", users_acc)

    # Calculate variance of user accuracy
    v_sum_list = []
    for strata in master_df["strata"].unique():
        A_N_star_h = master_df.loc[master_df["strata"] == strata, "A_N_star_h"].values[0]
        A_n_star_h = master_df.loc[master_df["strata"] == strata, "A_n_star_h"].values[0]
        sy_h_2 = master_df.loc[master_df["strata"] == strata, "sy_h_2"].values[0]
        sx_h_2 = master_df.loc[master_df["strata"] == strata, "sx_h_2"].values[0]
        s_xy_h = master_df.loc[master_df["strata"] == strata, "s_xy_h"].values[0]

        v_sum_list.append(A_N_star_h**2 * (1 - A_n_star_h / A_N_star_h)
                          * (sy_h_2 + users_acc**2 * sx_h_2 - 2 * users_acc * s_xy_h)
                          / A_n_star_h)

    v_u = (1 / master_df["x_hat"].sum()) * sum(v_sum_list)
    print("Area-based standard error of producer accuracy = ", np.sqrt(v_u))

Class:  1
(840651.1749739902, 865428.7694943175)
Area-based user accuracy =  0.9713695737954203
Area-based standard error of user accuracy =  79.22684922037026
(840651.1749739902, 851869.7612423341)
Area-based user producer =  0.9868306321238787
Area-based standard error of producer accuracy =  79.89681318170778
Class:  2
(107869.58823733861, 119088.1745056824)
Area-based user accuracy =  0.9057959674425231
Area-based standard error of user accuracy =  46.60851275092246
(107869.58823733861, 132647.18275766599)
Area-based user producer =  0.8132067790267832
Area-based standard error of producer accuracy =  45.63152476287917


In [17]:
users_acc

0.8132067790267832

In [18]:
master_df

Unnamed: 0,strata,A_N_star_h,A_n_star_h,N_star_h,X_bar,Y_bar,n_star_h,n_xu,n_yu,sx_h_2,...,x_bar_h,x_bar_h_count,xu_0_1,xu_IDs,y_bar_h,y_bar_h_count,yu_0_1,yu_IDs,s_xy_h,x_hat
0,alfalfa hay,133731.871,793.873165,3343,3208.118874,3208.118874,12,1.0,1.0,0.070023,...,0.023989,0.083333,[1.0],[56147_WSDA_SF_2016],0.023989,0.083333,[1],[56147_WSDA_SF_2016],-0.929977,11144.322583
6,barley hay,777.69,213.782841,42,48.100491,48.100491,3,1.0,1.0,0.148148,...,0.06185,0.333333,[1.0],[108269_WSDA_SF_2018],0.06185,0.333333,[1],[108269_WSDA_SF_2018],-0.851852,259.23
7,"bean, dry",25231.634,737.036021,546,12948.68675,12948.68675,15,6.0,6.0,2.090667,...,0.513193,0.4,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[53739_WSDA_SF_2016, 109890_WSDA_SF_2017, 1097...",0.513193,0.4,"[1, 1, 1, 1, 1, 1]","[53739_WSDA_SF_2016, 109890_WSDA_SF_2017, 1097...",-5.856,10092.6536
8,"bean, green",2004.145,543.773125,48,1104.923004,1104.923004,12,3.0,3.0,0.630208,...,0.551319,0.25,"[1.0, 1.0, 1.0]","[105141_WSDA_SF_2018, 102909_WSDA_SF_2018, 103...",0.551319,0.25,"[1, 1, 1]","[105141_WSDA_SF_2018, 102909_WSDA_SF_2018, 103...",-2.859375,501.03625
10,bluegrass seed,7834.208,898.257392,108,2050.007699,2050.007699,9,1.0,1.0,0.087791,...,0.261674,0.111111,[1.0],[56661_WSDA_SF_2016],0.261674,0.111111,[1],[56661_WSDA_SF_2016],-0.912209,870.467556
11,buckwheat,6829.722,606.458391,131,6829.722,6829.722,7,7.0,7.0,5.142857,...,1.0,1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[104287_WSDA_SF_2018, 101268_WSDA_SF_2018, 104...",1.0,1.0,"[1, 1, 1, 1, 1, 1, 1]","[104287_WSDA_SF_2018, 101268_WSDA_SF_2018, 104...",-7.0,6829.722
17,"corn, field",118490.006,3576.554351,2866,26525.857543,23085.118295,71,13.0,11.0,2.313704,...,0.223866,0.183099,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[103824_WSDA_SF_2017, 104993_WSDA_SF_2017, 582...",0.194828,0.15493,"[1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1]","[103824_WSDA_SF_2017, 104993_WSDA_SF_2017, 582...",-12.896611,21695.353211
18,"corn, sweet",38576.666,649.169593,649,15918.747719,15918.747719,8,3.0,3.0,0.861328,...,0.412652,0.375,"[1.0, 1.0, 1.0]","[98789_WSDA_SF_2017, 104488_WSDA_SF_2018, 1030...",0.412652,0.375,"[1, 1, 1]","[98789_WSDA_SF_2017, 104488_WSDA_SF_2018, 1030...",-2.853516,14466.24975
26,grass seed,2087.985,342.855213,43,962.187303,878.191024,7,3.0,2.0,0.944606,...,0.460821,0.428571,"[1.0, 1.0, 1.0]","[107177_WSDA_SF_2018, 105142_WSDA_SF_2018, 107...",0.420593,0.285714,"[1, 0, 1]","[105142_WSDA_SF_2018, 107315_WSDA_SF_2018]",-2.906706,894.850714
32,onion,24357.162,989.147937,407,689.834517,689.834517,11,1.0,1.0,0.075131,...,0.028322,0.090909,[1.0],[101197_WSDA_SF_2018],0.028322,0.090909,[1],[101197_WSDA_SF_2018],-0.924869,2214.287455
