In [1]:
from data_download_tool import download_data
from annotation_viewer import plot_annotations
from pandas_statistics import file_path_loader
from plots import *
import os


## Download tool
This is the tool you use to download data from the AWS silt annotations bucket. Enter in the aws directory, and where you would like to download your data to. 

In [None]:
#Enter in the parameters you wish to download
aws_directory = "third-party-data/PDS-RME04/Satellite/Annotations/PDS-RME04/2024-04-30/"
download_directory = "./data/PDS-RME04-2024-04-17/"
statistics_filename = "PDS-RME04-2024-04-30"

download_data(aws_directory, download_directory, statistics_filename)

Loading file locations: 0it [00:00, ?it/s]

Loading file locations: 1it [00:00,  1.48it/s]
Downloading and Collecting Statistics: 100%|██████████| 190/190 [02:50<00:00,  1.11it/s]


## Select Dataset
Before using any of the functions below this, please enter in the directory of the dataset you want to analyze. From here all other functions work. 

In [None]:
#Enter in the parameters you wish to download
dataset_directory = "/mnt/c/Users/david.chaparro/Documents/Repos/Dataset-Statistics/data/RME03Star"

#Local file handling tool
local_files = file_path_loader(dataset_directory)
print(f"Num Samples: {len(local_files)}")


## Plot Dataset Statistics

This generates plots for all attributes collected during download or during statistics recalculation. 

In [None]:
# plots_save_path = "/mnt/c/Users/david.chaparro/Documents/Repos/Dataset-Statistics/data/RME03Star/plots/"
plots_save_path = os.path.join(dataset_directory, "plots")
data_statistics = local_files.statistics_file
#Plot all statistics collected in the file
for col_name, col_data in data_statistics.sample_attributes.items():
    column_type = detect_column_type(col_data)
    print(column_type)
    if column_type == "categorical":
        plot_categorical_column(col_data, filepath=plots_save_path, dpi=500)
    elif column_type == "numerical":
        plot_numerical_column(col_data, filepath=plots_save_path, dpi=500)
    elif column_type == "time":
        plot_time_column(col_data, filepath=plots_save_path, dpi=500)
for col_name, col_data in data_statistics.annotation_attributes.items():
    column_type = detect_column_type(col_data)
    if column_type == "categorical":
        plot_categorical_column(col_data, filepath=plots_save_path, dpi=500)
    elif column_type == "numerical":
        plot_numerical_column(col_data, filepath=plots_save_path, dpi=500)


#Plot the x and y locations of the annotations
x_locations=data_statistics.annotation_attributes["x_center"]
y_locations=data_statistics.annotation_attributes["y_center"]
plot_scatter(x_locations, y_locations, alpha=.05, filepath=plots_save_path, dpi=500)

#Plot line segments
plot_lines(data_statistics.annotation_attributes["x1"], data_statistics.annotation_attributes["y1"],
           data_statistics.annotation_attributes["x2"], data_statistics.annotation_attributes["y2"],
           filepath=plots_save_path, dpi=500, alpha=.10)


## Plot Annotations

Plots the annotations, can plot in a zoomed in image of each individual annotation or a whole image of all annotations. Star segments or bounding boxes included

In [None]:
#Enter the dataset directory you wish to plot annotations for
view_satellite=False
view_star=False
view_image=True


plot_annotations(dataset_directory, view_satellite=False, view_star=False, view_image=True):

## Custom paramater search example

Can search and count the number of samples with a certian attribute. __image_attributes__ is for parameters relevant to the sample in each dataset. __annotation_attributes__ is for parameters relevant to each annotation in the dataset. You can use pandas to query the datasets. 

In [None]:
#Pandas dataframes for referenece
#Sample attributes searches parameters relevant to the sample in the dataset
image_attributes = local_files.statistics_file.sample_attributes
#Annotation attributes searches parameters relevant to the annotation in each sample
annotation_attributes = local_files.statistics_file.annotation_attributes

plt.hist(image_attributes["num_objects"], bins=100)
plt.show()
print(len(image_attributes[image_attributes["num_objects"]==0]))

## Delete Unwanted Files

Can delete files from dataset with a certian attribute. __image_attributes__ is for parameters relevant to the sample in each dataset and you must use. __annotation_attributes__ is for parameters relevant to each annotation in the dataset. You can use pandas to query the datasets. 

In [None]:
#Pandas dataframes for referenece
image_attributes = local_files.statistics_file.sample_attributes
annotation_attributes = local_files.statistics_file.annotation_attributes


#Delete files based on the sample attributes - Inside is a pandas query that you can modify 
# local_files.delete_files_from_annotation(annotation_attributes[annotation_attributes['measured_snr'] < 5.0])

#Delete files based on the annotation attributes - Inside is a pandas query that you can modify 
# local_files.delete_files_from_sample(image_attributes[image_attributes['num_objects'] == 0].sample(700))

## Recalculate Statistics

In [None]:
local_files.recalculate_statistics()