In [None]:
import os, pandas as pd, numpy as np, seaborn as sns

pd.set_option('display.max_columns', 50) # How many to show
pd.set_option('display.min_rows', 25) # How many to show
pd.set_option('display.max_rows', 25) # How many to show
pd.set_option('display.width', 1000) # How far across the screen
pd.set_option('display.max_colwidth', 100) # Column width in px
pd.set_option('expand_frame_repr', True) # allows for the representation of dataframes to stretch across pages, wrapped over the full column vs row-wise

from matplotlib import pyplot as plt 
from sklearn.metrics import confusion_matrix
from dotenv import load_dotenv
print(os.getcwd())

# Optional below

# load_dotenv('../.env_analysis')

### If all else fails
PROJECT_INPUT_DATA_DIR="/sddata/app_or_generated_data/Image-Comparator-Analysis/raw_annotations/arvo_abstract_flicker_12_06_2023
PROJECT_DIR="/sddata/app_or_generated_data/Image-Comparator-Analysis/analysis/flicker_abstract_results_12_07_2023

# DATA_DIR=os.environ["DATA_DIR"]
# PROJECT_DIR=os.environ["PROJECT_DIR"]
# PROJECT_INPUT_DATA_DIR=os.environ["PROJECT_INPUT_DATA_DIR"]
# DNS=os.environ["DNS"]
# IMAGES_DB=os.environ["IMAGES_DB"]
# DB_PORT=os.environ["DB_PORT"]
# DB_ADMIN_USER=os.environ["DB_ADMIN_USER"]
# DB_ADMIN_PASS=os.environ["DB_ADMIN_PASS"]
# ADMIN_PARTY=True if os.environ["ADMIN_PARTY"] == 'True' else False

# print(f"""
# DATA_DIR: {DATA_DIR}
# PROJECT_DIR: {PROJECT_DIR}
# PROJECT_INPUT_DATA_DIR: {PROJECT_INPUT_DATA_DIR}
# DNS: {DNS}
# IMAGES_DB: {IMAGES_DB}
# DB_PORT: {DB_PORT}
# DB_ADMIN_USER: {DB_ADMIN_USER}
# DB_ADMIN_PASS: {DB_ADMIN_PASS}
# ADMIN_PARTY: {ADMIN_PARTY}
# """)



Read in everyone's annotations:

In [None]:
# image key
image_key = pd.read_csv(os.path.join(PROJECT_INPUT_DATA_DIR, "app_image_key.csv"))
image_key.sort_values('index', inplace=True)
image_key['image_id'] = image_key['relative_path'] + "_" + image_key['image']

# Scott's Model and EHR
flicker_registered_stable = pd.read_csv(os.path.join(PROJECT_INPUT_DATA_DIR, "flicker_registered_stable.csv"))
flicker_registered_stable.rename(columns={"fixed_img_path":"image_1_id", "moving_img_path":"image_2_id"}, inplace=True)

EHR_stable = flicker_registered_stable.loc[:,["image_1_id","image_2_id","gt_cdr_before","gt_cdr_after"]]; EHR_stable.rename(columns={"gt_cdr_before":"CDR_1","gt_cdr_after":"CDR_2"}, inplace=True)
Segformer_stable = flicker_registered_stable.loc[:,["image_1_id","image_2_id","pred_cdr_before","pred_cdr_after"]]; Segformer_stable.rename(columns={"gt_cdr_before":"CDR_1","gt_cdr_after":"CDR_2"}, inplace=True)

flicker_registered_mild_prog = pd.read_csv(os.path.join(PROJECT_INPUT_DATA_DIR, "flicker_registered_mild_prog.csv"))
flicker_registered_mild_prog.rename(columns={"fixed_img_path":"image_1_id", "moving_img_path":"image_2_id"}, inplace=True)

EHR_mild_prog = flicker_registered_mild_prog.loc[:,["image_1_id","image_2_id","gt_cdr_before","gt_cdr_after"]]; EHR_mild_prog.rename(columns={"gt_cdr_before":"CDR_1","gt_cdr_after":"CDR_2"}, inplace=True)
Segformer_mild_prog = flicker_registered_mild_prog.loc[:,["image_1_id","image_2_id","pred_cdr_before","pred_cdr_after"]]; Segformer_mild_prog.rename(columns={"gt_cdr_before":"CDR_1","gt_cdr_after":"CDR_2"}, inplace=True)

flicker_registered_sig_prog = pd.read_csv(os.path.join(PROJECT_INPUT_DATA_DIR, "flicker_registered_sig_prog.csv"))
flicker_registered_sig_prog.rename(columns={"fixed_img_path":"image_1_id", "moving_img_path":"image_2_id"}, inplace=True)

EHR_sig_prog = flicker_registered_sig_prog.loc[:,["image_1_id","image_2_id","gt_cdr_before","gt_cdr_after"]]; EHR_sig_prog.rename(columns={"gt_cdr_before":"CDR_1","gt_cdr_after":"CDR_2"}, inplace=True)
Segformer_sig_prog = flicker_registered_sig_prog.loc[:,["image_1_id","image_2_id","pred_cdr_before","pred_cdr_after"]]; Segformer_sig_prog.rename(columns={"gt_cdr_before":"CDR_1","gt_cdr_after":"CDR_2"}, inplace=True)


# classify
all_classify = pd.read_csv(os.path.join(PROJECT_INPUT_DATA_DIR, "all-v2_12_1_23-classify-0.csv"))
all_classify = pd.merge(image_key, all_classify, on="image_id")
all_classify.rename(columns={"Image":"CDR"}, inplace=True)

# compare
all_compare = pd.read_csv(os.path.join(PROJECT_INPUT_DATA_DIR, "all-v2_12_1_23-compare-0.csv"))
# flicker
all_flicker = pd.read_csv(os.path.join(PROJECT_INPUT_DATA_DIR, "all-v2_12_1_23-flicker-0.csv"))


## Stats to perform, maybe???

[*] Interater variability - raw CDR
  - Avg CDR by image even though there are 2 time points
  - Get each persons Estimated CDR by these images
  - plot is x-axis is average, and y-axis is estimated.
    * there might be images where avg was same value, this will allow for multiple estimated CDRs by person to show up on this average
  - Plot raters against each other
    * 


[] Rater v Scott's Model
  - 3 plots

[] Bar plots for each rater showing counts for categories of radio buttons
  - 3 plots


### Interater Variability

#### Expected v Average

In [None]:
mcnamast_classify = all_classify[all_classify['user'] == 'mcnamast']
gdeitz_classify = all_classify[all_classify['user'] == 'gdeitz']
swangyu_classify = all_classify[all_classify['user'] == 'swangyu']

In [None]:
print(f"""
shapes:
mcnamast: {mcnamast_classify.shape} 
gdeitz: {gdeitz_classify.shape} 
swangyu: {swangyu_classify.shape}
average: {average_classify.shape}
scotts_segformer:  #...
ehr:  #...
""")

In [None]:
mcnamast_classify.sort_values('image_id', inplace=True);
gdeitz_classify.sort_values('image_id', inplace=True);
swangyu_classify.sort_values('image_id', inplace=True);
average_classify.sort_values('image_id', inplace=True);

In [None]:
data_classify = pd.DataFrame({
"image_id": list(average_classify['image_id']),
"average_CDR": list(average_classify['CDR']),
"mcnamast_CDR": list(mcnamast_classify['CDR']),
"gdeitz_CDR": list(gdeitz_classify['CDR']),
"swangyu_CDR": list(swangyu_classify['CDR']),
})

sns.scatterplot(data=data_classify, x='average_CDR', y='mcnamast_CDR', label='mcnamast_CDR')
sns.scatterplot(data=data_classify, x='average_CDR', y='gdeitz_CDR', label='gdeitz_CDR')
sns.scatterplot(data=data_classify, x='average_CDR', y='swangyu_CDR', label='swangyu_CDR')


# Melt the DataFrame to create a single column for y-values and another for category labels
# melted_data = data.melt(id_vars=['image_id'], var_name='annotator', value_name='CDR')
# melted_data
# Create a scatter plot with different colors for each category
# sns.scatterplot(data=melted_data, x='average_CDR', y='CDR', hue='annotator')

# # # Customize the plot
plt.xlabel('Average CDR')
plt.ylabel('Estimated CDR')
plt.title('Scatter Plot of Annotators Est v Avg CDR')
plt.legend()

# Show the plot
plt.show()


#### Raters Against Each Other

In [None]:
# Compare Bins Created from Classify
average_classify = all_classify.groupby('image_id').mean("CDR"); average_classify.reset_index(inplace=True)
average_classify = pd.merge(image_key, average_classify, on="image_id")[["image_id","order","CDR"]]

average_first = average_classify[average_classify['order'] == "first"][['image_id', 'order','CDR']].reset_index()
average_first.rename(columns={"image_id":"image_1_id", "CDR":"CDR_1"}, inplace=True)
average_second = average_classify[average_classify['order'] == "second"][['image_id', 'order','CDR']].reset_index()
average_second.rename(columns={"image_id":"image_2_id", "CDR":"CDR_2"}, inplace=True)
average_side_by_side_cdrs = pd.concat([average_first, average_second], axis=1)

average_side_by_side_cdrs['CDR_Diff'] = abs(average_side_by_side_cdrs['CDR_1'] - average_side_by_side_cdrs['CDR_2'])
#### Wrong BINS BEING CALCULATED!!! sTEVE!!!!!!!!
average_side_by_side_cdrs['Choose CDR Desc'] = np.where((average_side_by_side_cdrs['CDR_Diff'] >= 0.0) & (average_side_by_side_cdrs['CDR_Diff'] < 0.15), 'Stable (CDR change 0.0)', 
                                               np.where((average_side_by_side_cdrs['CDR_Diff'] >= 0.15) & (average_side_by_side_cdrs['CDR_Diff'] < 0.3), 'Mild Progression (CDR change 0.00 - 0.15)', 
                                                                                                                                                         'Significant Progression (CDR change 0.15 - 0.3)'))
average_side_by_side_cdrs.sort_values(['image_1_id', 'image_2_id'], inplace=True)
average_side_by_side_cdrs.head(20)

In [None]:
# Scott's Model OLD
# ...pending

# Scott's Model NEW
# ...pending

# EHR
# ...pending


# Classify
average_side_by_side_cdrs

# Compare
mcnamast_compare = all_compare[all_compare['user'] == 'mcnamast']
gdeitz_compare = all_compare[all_compare['user'] == 'gdeitz']
swangyu_compare = all_compare[all_compare['user'] == 'swangyu']

mcnamast_compare.sort_values(['image_1_id', 'image_2_id'], inplace=True)
gdeitz_compare.sort_values(['image_1_id', 'image_2_id'], inplace=True)
swangyu_compare.sort_values(['image_1_id', 'image_2_id'], inplace=True)
all_compare.sort_values(['image_1_id', 'image_2_id'], inplace=True)

# Flicker
mcnamast_flicker = all_flicker[all_flicker['user'] == 'mcnamast']
gdeitz_flicker = all_flicker[all_flicker['user'] == 'gdeitz']
swangyu_flicker = all_flicker[all_flicker['user'] == 'swangyu']

mcnamast_flicker.sort_values(['image_1_id', 'image_2_id'], inplace=True)
gdeitz_flicker.sort_values(['image_1_id', 'image_2_id'], inplace=True)
swangyu_flicker.sort_values(['image_1_id', 'image_2_id'], inplace=True)
all_flicker.sort_values(['image_1_id', 'image_2_id'], inplace=True)



In [None]:
mcnamast_compare['Choose CDR Desc']
gdeitz_compare['Choose CDR Desc']
swangyu_compare['Choose CDR Desc']

mcnamast_flicker['Choose CDR Desc']
gdeitz_flicker['Choose CDR Desc']
swangyu_flicker['Choose CDR Desc']

In [None]:
def make_confusing(ann1, ann2):
    # Create a confusion matrix
    cm = confusion_matrix(ann1['data']['Choose CDR Desc'], ann2['data']['Choose CDR Desc'], labels=['Stable (CDR change 0.0)', 'Mild Progression (CDR change 0.00 - 0.15)', 'Significant Progression (CDR change 0.15 - 0.3)'])
    # Plot the confusion matrix as a heatmap
    plt.figure(figsize=(6, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=['Stable (CDR change 0.0)', 'Mild Progression (CDR change 0.00 - 0.15)', 'Significant Progression (CDR change 0.15 - 0.3)'],
                yticklabels=['Stable (CDR change 0.0)', 'Mild Progression (CDR change 0.00 - 0.15)', 'Significant Progression (CDR change 0.15 - 0.3)'])
    plt.xlabel(f"{ann2['ann']} Labels")
    plt.ylabel(f"{ann1['ann']} Labels")
    plt.title('Confusion Matrix')
    plt.show()


In [None]:
ann1 = {"ann": "mcnamara", "data": mcnamast_compare[['image_1_id', 'image_2_id', 'Choose CDR Desc']]}
ann2 = {"ann": "gdeitz", "data": gdeitz_compare[['image_1_id', 'image_2_id', 'Choose CDR Desc']]}
ann3 = {"ann": "swangyu", "data": swangyu_compare[['image_1_id', 'image_2_id', 'Choose CDR Desc']]}
ann4 = {"ann": "average_classify_cdrs", "data": average_side_by_side_cdrs[['image_1_id', 'image_2_id', 'Choose CDR Desc']]}

In [None]:
make_confusing(ann1, ann2)

In [None]:
make_confusing(ann1, ann3)

In [None]:
make_confusing(ann1, ann4)

In [None]:
make_confusing(ann2, ann3)

In [None]:
make_confusing(ann2, ann4)

In [None]:
make_confusing(ann3, ann4)

In [None]:
ann1 = {"ann": "mcnamara", "data": mcnamast_flicker[['image_1_id', 'image_2_id', 'Choose CDR Desc']]}
ann2 = {"ann": "gdeitz", "data": gdeitz_flicker[['image_1_id', 'image_2_id', 'Choose CDR Desc']]}
ann3 = {"ann": "swangyu", "data": swangyu_flicker[['image_1_id', 'image_2_id', 'Choose CDR Desc']]}
ann4 = {"ann": "average_classify_cdrs", "data": average_side_by_side_cdrs[['image_1_id', 'image_2_id', 'Choose CDR Desc']]}

In [None]:
make_confusing(ann1, ann2)

In [None]:
make_confusing(ann1, ann3)

In [None]:
make_confusing(ann1, ann4)

In [None]:
make_confusing(ann2, ann3)

In [None]:
make_confusing(ann2, ann4)

In [None]:
make_confusing(ann3, ann4)

### Boxes

In [None]:

def plot_annotator_bar_plot(compare, flicker, annotator=""):
    data = pd.DataFrame({
        "Choose CDR Desc": list(compare['Choose CDR Desc']) + list(flicker['Choose CDR Desc']),
        "app": ["compare"]*len(compare[['taskid','Choose CDR Desc']]) + ["flicker"]*len(flicker[['taskid','Choose CDR Desc']])
    })

    # Create a countplot
    sns.set(style="whitegrid")
    ax = sns.countplot(data=data, x='Choose CDR Desc', hue='app')


    # Customize the plot
    plt.xlabel('Category')
    plt.ylabel('Count')
    plt.title(f"Annotator {annotator} Bar Chart with 3 Categories, Compare v Flicker")
    plt.legend(title='Source')

    # Rotate x-axis labels
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45)  # Adjust the rotation angle as needed

    # Show the plot
    plt.show()


In [None]:
plot_annotator_bar_plot(mcnamast_compare, mcnamast_flicker, annotator="mcnamast")

In [None]:
plot_annotator_bar_plot(gdeitz_compare, gdeitz_flicker, annotator="gdeitz")

In [None]:
plot_annotator_bar_plot(swangyu_compare, swangyu_flicker, annotator="swangyu")

In [None]:
plot_annotator_bar_plot(all_compare, all_flicker, annotator="all")