In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
from skimage import io
import statsmodels.api as sm

os.environ["NAMESPACE"]="research"
os.environ["PROFILE"]="local"
from agrobrain_util.runtime.evironment import RuntimeEnv

DATA_DIR = "data"
env = RuntimeEnv()



csv_path = os.path.join(DATA_DIR, "resolution_limitation_tags_dataframe_2.csv")
resolution_tags_df = pd.read_csv(csv_path)

images_data = pd.read_csv(os.path.join(DATA_DIR, 'resolution_test', 'resolution_test_images_dataframe_1000_images_full_data_1.csv'))
images_data = images_data[~images_data['wideImageID'].isna()].reset_index(drop=True)

resolution_tags_df_sorted = resolution_tags_df.sort_values(by='poly_union_area_pixel')

resolution_tags_df_sorted['res_7.2_binary'] = resolution_tags_df_sorted['res_7.2'].apply(lambda x: 1 if x > 0 else 0)
resolution_tags_df_sorted['res_9.0_binary'] = resolution_tags_df_sorted['res_9.0'].apply(lambda x: 1 if x > 0 else 0)
resolution_tags_df_sorted['res_10.8_binary'] = resolution_tags_df_sorted['res_10.8'].apply(lambda x: 1 if x > 0 else 0)
resolution_tags_df_sorted['res_12.6_binary'] = resolution_tags_df_sorted['res_12.6'].apply(lambda x: 1 if x > 0 else 0)
resolution_tags_df_sorted['res_14.4_binary'] = resolution_tags_df_sorted['res_14.4'].apply(lambda x: 1 if x > 0 else 0)

example_image_id = int(resolution_tags_df_sorted['image_id'][0])
im_path = env.download_image(int(example_image_id))
image = io.imread(im_path)
image_shape = image.shape
image_area_pixels = image_shape[0] * image_shape[1]


In [None]:
env.download_image(int(example_image_id))

In [None]:
image_id = resolution_tags_df_sorted['image_id'][0]
image_resolution = images_data[images_data['imageID']==image_id]['zoom_resolution'].values[0]

int(np.sqrt(resolution_tags_df_sorted[resolution_tags_df_sorted['image_id']==image_id]['poly_box_area_pixel'][7]) * image_resolution)

In [None]:
res_7_percent = sum(resolution_tags_df_sorted['res_7.2_binary']) / len(resolution_tags_df_sorted)
res_9_percent = sum(resolution_tags_df_sorted['res_9.0_binary']) / len(resolution_tags_df_sorted)
res_10_percent = sum(resolution_tags_df_sorted['res_10.8_binary']) / len(resolution_tags_df_sorted)
res_12_percent = sum(resolution_tags_df_sorted['res_12.6_binary']) / len(resolution_tags_df_sorted)
res_14_percent = sum(resolution_tags_df_sorted['res_14.4_binary']) / len(resolution_tags_df_sorted)

res_7_percent, res_9_percent, res_10_percent, res_12_percent, res_14_percent

In [None]:
chosen_resolution = 9.0

resolution_tags_df_filtered = resolution_tags_df_sorted[resolution_tags_df_sorted['poly_box_length_mm']<=100]

zero_rows = resolution_tags_df_filtered[resolution_tags_df_filtered[f'res_{chosen_resolution}_binary'] == 0]
one_rows = resolution_tags_df_filtered[resolution_tags_df_filtered[f'res_{chosen_resolution}_binary'] == 1]

# Randomly choose the same number of zero rows as the number of one rows
num_rows_to_choose = min(len(zero_rows), len(one_rows))
chosen_zero_rows = zero_rows.sample(n=num_rows_to_choose, random_state=42)

# Concatenate the chosen zero rows and one rows to create the balanced DataFrame
balanced_df = pd.concat([chosen_zero_rows, one_rows])

# Shuffle the rows of the balanced DataFrame (optional)
balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)


In [None]:
res_7_percent = sum(balanced_df['res_7.2_binary']) / len(balanced_df)
res_7_percent

In [None]:
resolution_tags_df_sorted.columns

In [None]:
min(resolution_tags_df_sorted['poly_box_length_mm']), max(resolution_tags_df_sorted['poly_box_length_mm'])

In [None]:
a = 100
b=15

a * b, np.sqrt(a * b), np.sqrt(a * b)

In [None]:
# PLOT THE GRAPHS ON TOP OF EACH OTHER

resolutions = [7.2, 9.0, 10.8, 12.6, 14.4]
colors = ['violet', 'palevioletred', 'hotpink', 'mediumvioletred', 'darkred']
colors = ['greenyellow', 'limegreen', 'mediumseagreen', 'g', 'darkgreen']

resolution_tags_df_filtered = resolution_tags_df_sorted[resolution_tags_df_sorted['original_polygons_avg_area_mm2']<=80000]
# resolution_tags_df_filtered = resolution_tags_df_sorted

fig, ax = plt.subplots(figsize=(18, 7))

for i, resolution in enumerate(resolutions):
    zero_rows = resolution_tags_df_filtered[resolution_tags_df_filtered[f'res_{resolution}_binary'] == 0]
    one_rows = resolution_tags_df_filtered[resolution_tags_df_filtered[f'res_{resolution}_binary'] == 1]

    # Randomly choose the same number of zero rows as the number of one rows
    num_rows_to_choose = min(len(zero_rows), len(one_rows))
    chosen_zero_rows = zero_rows.sample(n=num_rows_to_choose, random_state=42)

    # Concatenate the chosen zero rows and one rows to create the balanced DataFrame
    balanced_df = pd.concat([chosen_zero_rows, one_rows])

    # # Shuffle the rows of the balanced DataFrame (optional)
    # balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)

    data = {
        'x': balanced_df['original_polygons_avg_area_mm2'].values,
        'y': balanced_df[f'res_{resolution}_binary'].values,  # y values should be between 0 and 1 for logistic curve fitting
    }

    df = pd.DataFrame(data)

    # Add constant to the predictor variable for the logistic regression
    X = sm.add_constant(df['x'])

    # Fit the logistic regression model
    logit_model = sm.Logit(df['y'], X)
    result = logit_model.fit()

    # # Print the model summary
    # print(result.summary())

    # Get the fitted parameters
    params = result.params
    a_fit, b_fit = params

    # Generate the curve based on the fitted parameters
    x_curve = np.linspace(min(df['x']), max(df['x']), 100)
    # x_curve = np.linspace(-100, max(df['x']), 100)

    y_curve = 1 / (1 + np.exp(-params[0] - params[1] * x_curve))

    ax.scatter(df['x'], df['y'], color=colors[i], marker='o', alpha=0.5, s=10)
    x_th = -params[0] / params[1]
    
    ax.plot(x_curve, y_curve, color=colors[i], label=f'Logistic Curve Resolution {resolution}: 0.5 at {"{:.2f}".format(x_th)} mm2')

ax.set_xlabel('original_polygons_avg_area_mm2')
ax.set_ylabel(f'Identifiied y/n')
ax.set_title(f'Resolution curves - original_polygons_avg_area_mm2')
ax.legend()
ax.grid(True)
plt.show()

In [None]:
result.summary()

In [None]:
-params[0] / params[1]

In [None]:


resolutions = [7.2, 9.0, 10.8, 12.6, 14.4]
colors = ['violet', 'palevioletred', 'hotpink', 'mediumvioletred', 'darkred']

resolution_tags_df_filtered = resolution_tags_df_sorted[resolution_tags_df_sorted['poly_box_length_mm']<=200]


for i, resolution in enumerate(resolutions):
    zero_rows = resolution_tags_df_filtered[resolution_tags_df_filtered[f'res_{resolution}_binary'] == 0]
    one_rows = resolution_tags_df_filtered[resolution_tags_df_filtered[f'res_{resolution}_binary'] == 1]

    # Randomly choose the same number of zero rows as the number of one rows
    num_rows_to_choose = min(len(zero_rows), len(one_rows))
    chosen_zero_rows = zero_rows.sample(n=num_rows_to_choose, random_state=42)

    # Concatenate the chosen zero rows and one rows to create the balanced DataFrame
    balanced_df = pd.concat([chosen_zero_rows, one_rows])

    # # Shuffle the rows of the balanced DataFrame (optional)
    # balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)

    data = {
        'x': balanced_df['poly_box_length_mm'].values,
        'y': balanced_df[f'res_{resolution}_binary'].values,  # y values should be between 0 and 1 for logistic curve fitting
    }

    df = pd.DataFrame(data)

    # Add constant to the predictor variable for the logistic regression
    X = sm.add_constant(df['x'])

    # Fit the logistic regression model
    logit_model = sm.Logit(df['y'], X)
    result = logit_model.fit()

    # # Print the model summary
    print(result.summary())

    # Get the fitted parameters
    params = result.params
    a_fit, b_fit = params

    # Generate the curve based on the fitted parameters
    x_curve = np.linspace(min(df['x']), max(df['x']), 100)
    y_curve = 1 / (1 + np.exp(-params[0] - params[1] * x_curve))

    # Plot the original data points and the fitted logistic curve
    plt.figure(figsize=(18, 7))
    plt.scatter(df['x'], df['y'], label='Data', color="black", marker='o', alpha=0.5, s=10)
    plt.plot(x_curve, y_curve, color=colors[i], label=f'Logistic Curve Resolution {resolution}')
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title(f'res_{resolution}')
    plt.legend()
    plt.grid(True)
plt.show()

In [None]:
# resolution_tags_df['poly_union_area_pixel']
# resolution_tags_df['res_7.2']

resolution_tags_df_sorted = resolution_tags_df.sort_values(by='poly_union_area_pixel')

resolution_tags_df_sorted['res_7.2_binary'] = resolution_tags_df_sorted['res_7.2'].apply(lambda x: 1 if x > 0 else 0)
resolution_tags_df_sorted['res_9.0_binary'] = resolution_tags_df_sorted['res_9.0'].apply(lambda x: 1 if x > 0 else 0)
resolution_tags_df_sorted['res_10.8_binary'] = resolution_tags_df_sorted['res_10.8'].apply(lambda x: 1 if x > 0 else 0)
resolution_tags_df_sorted['res_12.6_binary'] = resolution_tags_df_sorted['res_12.6'].apply(lambda x: 1 if x > 0 else 0)
resolution_tags_df_sorted['res_14.4_binary'] = resolution_tags_df_sorted['res_14.4'].apply(lambda x: 1 if x > 0 else 0)


plt.figure(figsize=(18, 7))
plt.scatter(resolution_tags_df_sorted['poly_box_length_mm'], resolution_tags_df_sorted['res_14.4_binary'], marker='o', alpha=0.5, s=10)
# plt.plot(resolution_tags_df['res_7.2'])

# Add labels and title (optional)
# plt.xlim(100, 20000)
# plt.xlim(15000, 30000)

plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Plotting a Curve')

# Show the plot
plt.show()


In [None]:
example_zoom_image_id = env.eti_api.get_matching_zoom_images([5372412])[0]
example_zoom_image_id

In [None]:
example_zoom_image_id