In [4]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
import shutil
from dotenv import load_dotenv

from collab_data.file_utils import expand_path, get_project_root
from collab_data.gcs_utils import GCSClient

### Grab API keys

In [5]:
# Load environment variables from .env file
load_dotenv()

gerbils_key = os.environ.get("COLLAB_GERBILS_KEY")
data_key = os.environ.get("COLLAB_DATA_KEY")

print(f"Gerbils key: {gerbils_key}")
print(f"Data key: {data_key}")

Gerbils key: /workspace/api-keys/collab-gerbils-444419-2c6aa9a47bb5.json
Data key: /workspace/api-keys/collab-data-463313-c340ad86b28e.json


### Grab current project data

In [6]:
CURRENT_PROJECT = "COLLAB_DATA"
PROJECT_KEY = Path(os.environ.get(f"{CURRENT_PROJECT}_KEY"))
PROJECT_ID = "-".join(PROJECT_KEY.stem.split("-")[:-1])

# Connect to GCS
gcs_client = GCSClient(
    project_id=PROJECT_ID,
    credentials_path=expand_path(PROJECT_KEY.as_posix(), get_project_root()),
)

[32m2025-08-08 14:04:53.209[0m | [1mINFO    [0m | [36mcollab_data.gcs_utils[0m:[36m__init__[0m:[36m34[0m - [1mUsing credentials from /workspace/api-keys/collab-data-463313-c340ad86b28e.json[0m
[32m2025-08-08 14:04:53.296[0m | [1mINFO    [0m | [36mcollab_data.gcs_utils[0m:[36m__init__[0m:[36m40[0m - [1mUsing project collab-data-463313[0m


### Find current folders

In [7]:
all_buckets = gcs_client.list_buckets()
print(f"Available buckets: {all_buckets}")

Available buckets: ['fieldwork_curated', 'fieldwork_processed', 'fieldwork_storage_old', 'roboflow_model']


### Start with the curated

In [8]:
curated_data_dir = 'fieldwork_processed'

# Find all files in the curated fieldwork
curated_dirs = gcs_client.glob(f"{curated_data_dir}/*")

#### Create a directory and push to gcloud

In [16]:
# curated_dir = curated_dirs[1]
session = '2023-11-05'
session_dir = Path("/workspace/fieldwork-data/birds/") / session

# Path on gcloud
curated_session_dirs = [d for d in curated_dirs if session.replace('-', '_') in d]
curated_dir = curated_session_dirs[0]


# sessions = ['2024-05-19', '2024-05-23', '2024-05-27']

# for session in sessions:

    # curated_dir = os.path.join(curated_data_dir, "processed_splats", session)
# 
    # gcs_prefix = f"{BUCKET_NAME}/{SESSION_DIR.name}"
processed_files = gcs_client.glob(f"{curated_dir}/**")
for root, _, files in os.walk(session_dir):
    for file in files:
        local_path = Path(root) / file
        relative_path = local_path.relative_to(session_dir)
        gcs_path = f"{curated_dir}/{relative_path.as_posix()}"

        # if 'mesh' not in local_path.as_posix():
        #     continue

        if Path(root).name in ['images_4', 'images_8']:
            continue
        
        # print (relative_path)
        # full_path = session_dir + relative_path
        if 'mesh' not in local_path.as_posix():
            if gcs_path in processed_files:
                continue
        
        gcs_client.upload_file(str(local_path), gcs_path)

# # üîç List contents to verify
# print("\nUploaded GCS paths:\n")
# for path in gcs_client.glob(f"{curated_dir}/**"):
#     print(path)

[32m2025-08-08 16:00:22.486[0m | [1mINFO    [0m | [36mcollab_data.gcs_utils[0m:[36mupload_file[0m:[36m179[0m - [1mUploading file /workspace/fieldwork-data/birds/2023-11-05/environment/PXL_20231105_154956078/rade-features/mesh/transforms.pkl to fieldwork_processed/2023_11_05-session_0001/environment/PXL_20231105_154956078/rade-features/mesh/transforms.pkl.[0m
[32m2025-08-08 16:00:22.796[0m | [1mINFO    [0m | [36mcollab_data.gcs_utils[0m:[36mupload_file[0m:[36m181[0m - [1mUploaded file /workspace/fieldwork-data/birds/2023-11-05/environment/PXL_20231105_154956078/rade-features/mesh/transforms.pkl to fieldwork_processed/2023_11_05-session_0001/environment/PXL_20231105_154956078/rade-features/mesh/transforms.pkl.[0m
[32m2025-08-08 16:00:22.798[0m | [1mINFO    [0m | [36mcollab_data.gcs_utils[0m:[36mupload_file[0m:[36m179[0m - [1mUploading file /workspace/fieldwork-data/birds/2023-11-05/environment/PXL_20231105_154956078/rade-features/mesh/mesh.ply to fieldw

### Redownload colmap files

In [None]:
colmap_fn = curated_dir + "environment/C0043/preproc/colmap/features.h5"
out_fn = "/workspace/fieldwork-data/birds/2024-02-06/environment/C0043/preproc/colmap/features.h5"

gcs_client.gcs.get_file(colmap_fn, out_fn)

In [None]:

# file_path = gcs_client.glob(f"{all_buckets[-1]}/field/*final*")[0]