In [2]:
from uuid import uuid4
import os
import pandas as pd
from databricks.sdk import WorkspaceClient

from ag_vision.drone.ingest import DroneDataIngest

os.environ.pop('DATABRICKS_HOST', None)
os.environ.pop('DATABRICKS_TOKEN', None)

w = WorkspaceClient(profile='tnau')

In [3]:
w.config.host

'https://dbc-d9e47e1c-4080.cloud.databricks.com'

In [4]:
# Change the following to the location where the local data is stored.
parent_folder_path = '/Users/danielwilliams/Documents/Field Data/drone_upload_test'
raw_image_dir = f'{parent_folder_path}/raw_flight_data/'
files = os.listdir(raw_image_dir)
files = [raw_image_dir + x for x in files]
files

['/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_flight_data/DJI_20241124144351_0235_MS_R.TIF',
 '/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_flight_data/DJI_20241124144459_0269_MS_R.TIF',
 '/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_flight_data/DJI_20241124144457_0268_MS_RE.TIF',
 '/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_flight_data/DJI_202411301311_002_TNAU-VBN-30112024-20_PPKRAW.bin',
 '/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_flight_data/DJI_20241124144453_0266_MS_NIR.TIF',
 '/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_flight_data/DJI_20241124144459_0269_MS_G.TIF',
 '/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_flight_data/DJI_20241124144453_0266_D.JPG',
 '/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_flight_data/DJI_20241124144449_0264_MS_RE.TIF',
 '/Users/danielwilliams/Documents/Field Data/drone_upload_test/raw_

In [5]:
flight_metadata = {
    "id": str(uuid4()),
    "name": "corn_health_assessment_flight",  # this will be the mission name in the folder path
    "task": "drone_phenotyping",
    "location": {
        "site": "TNAU-Coimbatore".lower(),  # Try to keep these standard ORG-Site EX CIAT-Cali, or CIAT-Arusha....
        "field": "West_Field_03".lower(),
        # what is the name of the field that the trial was run on? The field and location can be the same name if there is no difference.
        "location": "Section_B".lower()
        # The location corresponds to a specific field book. If data is in EBS match the location name.
    },
    "trialProperties": {
        "name": "BETS"  # What is the name of the trial, a trial usually has multiple locations.
    },
    "drone_acquisition_properties": {
        'date': '11/25/2024',
        'drone_make': "DJI",
        'drone_model': "Phantom Mavic 4 Enterprise",
        'camera_make': "Sony",
        'camera_model': "UCM-R",
        'groundControlPoints': True,
        'reflectancePanels': True,
        'reflectancePanelType': 'Micasense',
        'flightHeight': 90.0,
        'horizontalOverlapPercentage': 75.0,
        'verticalOverlapPercentage': 75.0,
        'gpsQuality': "RTK",
        'multispecChannels': ["Red", "Green", "Blue", "NIR"]
    },
    "agronomic_properties": {
        "crop_type": "corn",  # Required
        "growth_stage": "VT",  # optional
        "soil_color": None,
        "weed_pressure": None,
        "irrigation_level": None,
        "tillage_type": None,
        "fertilizer_level": None
    }
}

In [6]:
# This points to the aps1-prod-tnau-fg workspace _YOU SHOULD NOT NEED TO CHANGE THIS.
workspace_bucket = '/Volumes/aps1_prod_tnau_fg_catalog_1336582592012881/tier1_raw/data'

ingest = DroneDataIngest(platform='local',  # DONT CHANGE THIS
                         cloud_bucket=workspace_bucket, # Set this to the bucket you want to save the data to.
                         cloud_client=w, # should not need to change.
                         flight_date='11/25/2024',
                         plot_boundary_key=f"{parent_folder_path}/plot_boundary.geojson",
                         gcp_key=f"{parent_folder_path}/gcp_details.geojson",
                         orthomosaic_key=f"{parent_folder_path}/30112024_ORTHO.tif",
                         dem_key=f"{parent_folder_path}/30112024_DSM.tif",
                         flight_metadata_key=f"{parent_folder_path}/flight_details.json")  # If this does not exist then this just tells the code were to save it so it can be uploaded.

In [7]:
ingest.load_metadata_from_dict(metadata_dict=flight_metadata)

# season is needed to generate the mission dir, This has lots of Validation and will throw assert errors.
ingest.add_season_code_to_metadata(year=2025,
                                   country='IND',
                                   crop='maize',
                                   time_of_year='spring')

# This is the main dir where all the data will be stored.
ingest.generate_drone_mission_dir_path()

In [8]:
# This saves the metadata locally do it can be uploaded.
ingest.save_flight_metadata_to_json_local()

In [10]:
# This will be a list of all the raw files from the flight eg: nav, bin, tif, and jpg files.
ingest.generate_raw_ingest_df(file_list=files)

In [13]:
def determine_camera_type(f_name):
    if "_MS_" in f_name:
        return 'multi-spec'
    elif ".JPG" in f_name:
        return 'rgb'
    else:
        return 'flight_data'

ingest.raw_ingest_df['camera'] = ingest.raw_ingest_df['file_name'].apply(determine_camera_type)

flight_data = ingest.raw_ingest_df[ingest.raw_ingest_df['file_type'] == 'flight_data']

f_ingest_df_list = []

# The flight data needs to be saved with both camera data sets.
for idx, df in ingest.raw_ingest_df.groupby('camera'):
    if idx in ['rgb', 'multi-spec']:
        out_df = pd.concat([df, flight_data])
        out_df['camera'] = idx
        f_ingest_df_list.append(out_df)

f_ingest_df = pd.concat(f_ingest_df_list)


flight_data
multi-spec
rgb


In [14]:
ingest.raw_ingest_df = f_ingest_df

In [15]:
# generates the new path names where to save it in databricks
ingest.generate_raw_image_dst_path_name()

In [16]:
ingest.raw_ingest_df.sample(10)

Unnamed: 0,src_path,file_name,img_ext,file_type,camera,dst_path
37,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144459_0269_MS_RE.TIF,.TIF,,multi-spec,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
75,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144501_0270_D.JPG,.JPG,,rgb,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
1,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144459_0269_MS_R.TIF,.TIF,,multi-spec,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
38,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144457_0268_MS_R.TIF,.TIF,,multi-spec,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
41,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144455_0267_MS_G.TIF,.TIF,,multi-spec,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
7,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144449_0264_MS_RE.TIF,.TIF,,multi-spec,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
70,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144455_0267_MS_NIR.TIF,.TIF,,multi-spec,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
56,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144413_0246_D.JPG,.JPG,,rgb,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
26,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144447_0263_MS_RE.TIF,.TIF,,multi-spec,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...
27,/Users/danielwilliams/Documents/Field Data/dro...,DJI_20241124144501_0270_MS_G.TIF,.TIF,,multi-spec,/Volumes/aps1_prod_tnau_fg_catalog_13365825920...


In [17]:
# Upload the metadata to databricks
ingest.upload_metadata_to_db()

Uploading: 100%|██████████| 1.65k/1.65k [00:02<00:00, 695B/s]  


In [18]:
# uploads the plot boundaries to databricks
ingest.upload_plot_boundary_to_db()

Saving to /Volumes/aps1_prod_tnau_fg_catalog_1336582592012881/tier1_raw/data/tnau-coimbatore/bets/2025:ind:maize:spring/west_field_03/section_b/drone/corn_health_assessment_flight/field_data/plot_boundary.geojson


Uploading: 100%|██████████| 190k/190k [00:01<00:00, 125kB/s]


In [19]:
# Uploads the ground control points to databricks
ingest.upload_gcp_to_db()

Saving to /Volumes/aps1_prod_tnau_fg_catalog_1336582592012881/tier1_raw/data/tnau-coimbatore/bets/2025:ind:maize:spring/west_field_03/section_b/drone/corn_health_assessment_flight/field_data/ground_control_points.geojson


Uploading: 100%|██████████| 1.28k/1.28k [00:00<00:00, 2.85kB/s]


In [20]:
# uploads all the raw flight data to databricks
ingest.upload_raw_flight_data_to_db()

0it [00:00, ?it/s]
Uploading:   0%|          | 0.00/10.1M [00:00<?, ?B/s][A
Uploading:   3%|▎         | 256k/9.62M [00:00<00:10, 896kB/s][A
Uploading:   5%|▍         | 480k/9.62M [00:00<00:11, 803kB/s][A
Uploading:  10%|▉         | 976k/9.62M [00:00<00:06, 1.31MB/s][A
Uploading:  15%|█▍        | 1.41M/9.62M [00:00<00:04, 1.97MB/s][A
Uploading:  17%|█▋        | 1.67M/9.62M [00:01<00:05, 1.64MB/s][A
Uploading:  20%|█▉        | 1.89M/9.62M [00:01<00:04, 1.74MB/s][A
Uploading:  26%|██▌       | 2.52M/9.62M [00:01<00:02, 2.67MB/s][A
Uploading:  31%|███       | 2.94M/9.62M [00:01<00:02, 2.70MB/s][A
Uploading:  35%|███▌      | 3.39M/9.62M [00:01<00:02, 3.06MB/s][A
Uploading:  39%|███▉      | 3.73M/9.62M [00:01<00:02, 2.93MB/s][A
Uploading:  42%|████▏     | 4.05M/9.62M [00:01<00:02, 2.70MB/s][A
Uploading:  45%|████▍     | 4.33M/9.62M [00:02<00:02, 2.57MB/s][A
Uploading:  48%|████▊     | 4.59M/9.62M [00:02<00:02, 2.57MB/s][A
Uploading:  51%|█████     | 4.92M/9.62M [00:02<00:01, 2.5

KeyboardInterrupt: 

In [None]:
# If you have a dem generated you can upload it so we don't need to reprocess.
ingest.upload_dem_to_db(method='agisoft',  # the method used to generate the dem.
                        dem_date='11/25/2024',  # the date the dem was generated.
                        file_name='dem.tif')

Saving to /Volumes/aps1_prod_tnau_fg_catalog_1336582592012881/tier1_raw/data/tnau-coimbatore/bets/2025:ind:maize:spring/west_field_03/section_b/drone/corn_health_assessment_flight/2024-11-25/dem/agisoft_2024-11-25/dem.tif


Uploading:  54%|█████▍    | 800M/1.45G [05:36<08:11, 1.47MB/s] 

In [None]:
# f"{drone_flight_path}/orthomosaic/{method}_{date}/{image_name}"
ingest.upload_orthomosaic_to_db(method='agisoft',  # the method used to generate the ortho.
                                ortho_date='11/25/2024', # the date the ortho was generated.
                                camera='multi-spec', # The camera that was used.
                                file_name='ms.tif')  # if RGB then you can name it rgb.tif