In [23]:
import pandas as pd
import os
import warnings
from tqdm import tqdm
from datetime import datetime
import csv
from skimage import io
import matplotlib.pyplot as plt
import numpy as np
import glob
import shutil




os.environ['PROFILE'] = 'local'
os.environ['NAMESPACE'] = 'production'

from agrobrain_util.runtime.evironment import RuntimeEnv
from agrobrain_util.infra.app_config import application_config as cfg
from agrobrain_apis.atlas.atlas_api import get_metrics, DataQuery

env = RuntimeEnv()
categories_dict = cfg['tags']['categories']

DATA_DIR = "data"
IMAGES_DIR = "images"

In [24]:
import random
n = 10000
total_rows = sum(1 for line in open(os.path.join(DATA_DIR, "anafa_tmp_wide_images_images_df.csv")))
skip_rows = sorted(random.sample(range(1, total_rows + 1), total_rows - n))
# images_df = pd.read_csv(os.path.join(DATA_DIR, "anafa_tmp_wide_images_images_df.csv"), skiprows=skip_rows)

# weeds_df = pd.read_csv(os.path.join(DATA_DIR, "weeds_images_df_light.csv"))
# images_df = weeds_df

top_weeds_df = pd.read_csv(os.path.join(DATA_DIR, "filtered_weeds_df.csv"))
images_df = top_weeds_df


# images_df = pd.read_csv(os.path.join(DATA_DIR, "anafa_tmp_wide_images_images_df.csv"), nrows=10000)

In [25]:
# CALCULATE "NUM WEED TAGS LIST"
# num_weed_tags_list = {}
images_df["num_weed_tags_list"] = None
for i, im_id in enumerate(tqdm(images_df["imageID"])):
    image_stats_list = eval(images_df['stats'][i])
    image_weed_tags_count = []
    for cat in image_stats_list:
        if cat['category'] == categories_dict['weed']:
            image_weed_tags_count.append(cat['tagsCount'])
    images_df.loc[images_df['imageID'] == im_id, "num_weed_tags_list"] = str(image_weed_tags_count)

100%|██████████| 100/100 [00:00<00:00, 3461.48it/s]


In [6]:
images_df['num_weed_tags'] = images_df['num_weed_tags_list'].apply(lambda x: sum(eval(x)))
images_df = images_df[(images_df['cameraAngle'] > -95) & (images_df['cameraAngle'] < -85)].reset_index(drop=True)

images_df_no_weeds = images_df[images_df['num_weed_tags'] == 0].reset_index(drop=True)

In [None]:
# # CHOOSING IMAGES FOR TAGGING 

# images_ids_to_dataset_no_weeds = [5346105, 7408611, 7408436, 7408601, 8633551, 5347319, 7411416]
# images_ids_to_dataset_with_weeds = [7659514, 9664706, 6010828, 8243459, 5487061, 5446549, 7275950]

# interesting_images = [6512128, 8699095, 'Zoom image id': 6433099, 'Wide image id': 6433827, 'Zoom image id': 6187517,
# 'Wide image id': 6189546, 'Zoom image id': 6607048, 'Wide image id': 6609997]

In [None]:

# df_to_show = images_df.sample(10).reset_index(drop=True)
df_to_show = images_df[:-10].reset_index(drop=True)


for i in range(len(df_to_show)):
    example_image_id = df_to_show['imageID'][i]
    # SHOW IMAGES
    print(f"Zoom image id: {example_image_id}")
    image_data = df_to_show[df_to_show['imageID'] == example_image_id].reset_index(drop=True)

    # image_data = df_to_show[df_to_show['imageID'] == example_image_id]
    matching_wide_image_id = env.eti_api.get_matching_wide_images(list([int(image_data.at[0, 'imageID'])]))[0]

    # images_df['wideImageID'] = env.eti_api.get_matching_wide_images(list(images_df['imageID']))

    print(f"Wide image id: {matching_wide_image_id}")
    image_num_tags = image_data.at[0, 'num_weed_tags']


    image_crop_name = image_data.at[0, 'cropName']
    im_path = env.download_image(int(example_image_id))
    image = io.imread(im_path)

    wide_im_path = env.download_image(int(matching_wide_image_id))
    wide_image = io.imread(wide_im_path)

    fig, axes = plt.subplots(nrows=1, ncols=2)
    axes[0].imshow(image, extent=[0, image.shape[1], 0, image.shape[0]])
    axes[1].imshow(wide_image, extent=[0, wide_image.shape[1], 0, wide_image.shape[0]])


    for ax in axes:
        ax.set_xticks([])
        ax.set_yticks([])
    fig.set_size_inches(10, 5)
    plt.suptitle(f"Crop Type: {image_crop_name}\nZoom Image ID: {example_image_id}, Wide Image ID: {matching_wide_image_id}\nNum Weed Tags: {image_num_tags}")
    plt.tight_layout()
    plt.show()

In [None]:
images_df.columns

In [14]:
# SAVE CHOSEN IMAGES DATAFRAME

# IMAGES WITHOUT WEEDS - READ CHOSEN IMAGES DATA FROM IMAGES_DF

csv_file = os.path.join(DATA_DIR, "anafa_tmp_wide_images_images_df.csv")

images_ids_to_dataset_no_weeds = [5346105, 7408611, 7408436, 7408601, 8633551, 5347319, 7411416]
images_ids_to_dataset_no_weeds_zoom = env.eti_api.get_matching_zoom_images(list(images_ids_to_dataset_no_weeds))

# ADD TO DATAFRAME IMAGES FROM TTT
ttt_images_paths = glob.glob(os.path.join("images_to_dataloop_ttt/wide_ttt", "*.jpg"))
ttt_images_ids = [int(os.path.basename(p).replace(".jpg", "")) for p in ttt_images_paths]
ttt_images_ids_zoom = env.eti_api.get_matching_zoom_images(list(ttt_images_ids))


chunk_size = 1000
filtered_chunks_images_without_weeds = []
filtered_chunks_ttt_images = []

for chunk in pd.read_csv(csv_file, chunksize=chunk_size):
    filtered_chunk_images_without_weeds = chunk[chunk['imageID'].isin(images_ids_to_dataset_no_weeds_zoom)]
    filtered_chunks_images_without_weeds.append(filtered_chunk_images_without_weeds)

    filtered_chunk_ttt_images = chunk[chunk['imageID'].isin(ttt_images_ids_zoom)]
    filtered_chunks_ttt_images.append(filtered_chunk_ttt_images)

df_chosen_images_without_weeds = pd.concat(filtered_chunks_images_without_weeds, ignore_index=True)
df_chosen_ttt_images = pd.concat(filtered_chunks_ttt_images, ignore_index=True)





# IMAGES WITH WEEDS

weeds_df_csv_file = os.path.join(DATA_DIR, "weeds_images_df_light.csv")
weeds_df = pd.read_csv(weeds_df_csv_file)

# CREATE DATAFRAME WITH NEW CHOSEN IMAGES
images_ids_to_dataset_with_weeds = [7659514, 9664706, 6010828, 8243459, 5487061, 5446549, 7275950]
images_ids_to_dataset_with_weeds_zoom = env.eti_api.get_matching_zoom_images(list(images_ids_to_dataset_with_weeds))
df_chosen_images_with_weeds = weeds_df[weeds_df['imageID'].isin(images_ids_to_dataset_with_weeds_zoom)]

# ADD TO DATAFRAME IMAGES FROM TTT
ttt_images_paths = glob.glob(os.path.join("images_to_dataloop_ttt/wide_ttt", "*.jpg"))
ttt_images_ids = [int(os.path.basename(p).replace(".jpg", "")) for p in ttt_images_paths]
df_existing_images_try = weeds_df[weeds_df['imageID'].isin(ttt_images_ids_zoom)]

total_chosen_images_df = pd.concat([df_chosen_images_without_weeds, df_chosen_images_with_weeds, df_chosen_ttt_images]).reset_index(drop=True)
total_chosen_images_df.to_csv(os.path.join(DATA_DIR, "wide_images_weeds_taggers_ragging_task_2023_06_27.csv"))


In [None]:
total_chosen_images_df['wideImageID'] = env.eti_api.get_matching_wide_images(list(total_chosen_images_df['imageID']))

In [15]:
total_chosen_images_df = pd.read_csv(os.path.join(DATA_DIR, "wide_images_weeds_taggers_ragging_task_2023_06_27.csv"))
len(total_chosen_images_df)

31

In [18]:
# DOWNLOAD CHOSEN IMAGES
total_chosen_images_df['wideImageID'] = env.eti_api.get_matching_wide_images(list(total_chosen_images_df['imageID']))
for im_id in total_chosen_images_df['wideImageID']:
    wide_im_path = env.download_image(im_id)


In [None]:
# COPY IMAGES TO FOLDER FOR DATALOOP BUCKET

df_to_dataloop =  total_chosen_images_df

# MAKE SURE ALL THE IMAGES IN FINAL_DF EXISTS IN LOCAL FOLDER
existing_images_list = glob.glob(os.path.join("images", "*.jpg"))
check_list = []
for im_id in df_to_dataloop['wideImageID']:
    if os.path.join("images", f"{im_id}.jpg"):
        check_list.append(True)

print(np.count_nonzero(check_list))


# MOVE CHOSEN IMAGES TO A FOLDER
source_folder = "images"
destination_folder = "images_to_dataloop"

# Move the file
destination_sub_folder = os.path.join(destination_folder, f"first_tagging_task")
os.makedirs(destination_sub_folder, exist_ok=True)
for im_id in df_to_dataloop['wideImageID']:
    source_path = os.path.join(source_folder, f"{int(im_id)}.jpg")
    image_destination_path = os.path.join(destination_sub_folder, f"{int(im_id)}.jpg")
    if not os.path.exists(image_destination_path):
        print(f"copying image {int(im_id)}.jpg")
        shutil.copy2(source_path, destination_sub_folder)