# GCS Access

In [None]:
import tempfile
import uuid
from pathlib import Path

from collab_env.data.gcs_utils import GCSClient

%load_ext autoreload
%autoreload 2

## Example 2: create and manupulate a remote bucket

In [2]:
BUCKET_NAME = f"test-bucket-collab-data_{uuid.uuid4()}"

# Connect to GCS and create a the bucket
# use default credentials for collab-data project
gcs_client = GCSClient()

gcs_client.create_bucket(BUCKET_NAME)

print(f"globbing the bucket {BUCKET_NAME}:\n\n{gcs_client.glob(f'{BUCKET_NAME}/*')}")

[32m2025-07-23 18:46:51.274[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36m__init__[0m:[36m34[0m - [1mUsing credentials from /Users/dima/git/collab-environment/config-local/collab-data-463313-c340ad86b28e.json[0m
[32m2025-07-23 18:46:51.275[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36m__init__[0m:[36m40[0m - [1mUsing project collab-data-463313[0m
[32m2025-07-23 18:46:52.379[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36mcreate_bucket[0m:[36m66[0m - [1mCreated bucket test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402.[0m


globbing the bucket test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402:

[]


In [3]:
all_buckets = gcs_client.list_buckets()

if BUCKET_NAME not in all_buckets:
    print(f"Bucket {BUCKET_NAME} does not exist, creating it...")
    gcs_client.create_bucket(BUCKET_NAME)
else:
    print(f"Bucket {BUCKET_NAME} already exists")

print(f"Available buckets: {gcs_client.list_buckets()}")

Bucket test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402 already exists
Available buckets: ['fieldwork_curated', 'fieldwork_processed', 'test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402']


### Create a folder for the bucket

In [4]:
# create a folder in the bucket
folder_name = f"test_folder_{uuid.uuid4()}"

gcs_client.create_folder(f"{BUCKET_NAME}/{folder_name}")

glob_str = f"{BUCKET_NAME}/{folder_name}/*"
print(f"globbing the folder {folder_name}:\n\n{gcs_client.glob(glob_str)}")

[32m2025-07-23 18:46:53.340[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36mcreate_folder[0m:[36m119[0m - [1mCreated folder marker at gs://test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/.folder_marker[0m


globbing the folder test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04:

['test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/.folder_marker']


### Adding files to bucket

Create a file locally and upload to GCS bucket

In [5]:
# upload a file to the folder
# create a temp file
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
    temp_file.write(b"test file content")
    temp_file.flush()

    gcs_client.upload_file(temp_file.name, f"{BUCKET_NAME}/{folder_name}/")

print(f"globbing the folder {folder_name}:\n\n{gcs_client.glob(glob_str)}")

[32m2025-07-23 18:46:53.390[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36mupload_file[0m:[36m179[0m - [1mUploading file /var/folders/2j/jdg5vjdx4x3_5tmlzl7nzygh0000gn/T/tmpu_ovefeb to test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/.[0m
[32m2025-07-23 18:46:53.560[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36mupload_file[0m:[36m181[0m - [1mUploaded file /var/folders/2j/jdg5vjdx4x3_5tmlzl7nzygh0000gn/T/tmpu_ovefeb to test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/.[0m


globbing the folder test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04:

['test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/.folder_marker', 'test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/tmpu_ovefeb']


### Access files from GCS bucket

In [6]:
# list file contents
with gcs_client.gcs.open(
    f"{BUCKET_NAME}/{folder_name}/{Path(temp_file.name).stem}", "r"
) as f:
    content = f.read()
    print(f"File content:\n{content}")

File content:
test file content


### Remove file from bucket

In [7]:
# remove the file
gcs_client.delete_path(f"{BUCKET_NAME}/{folder_name}/{Path(temp_file.name).stem}")

print(f"globbing the folder {folder_name}:\n\n{gcs_client.glob(glob_str)}")

[32m2025-07-23 18:46:53.935[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36mdelete_path[0m:[36m192[0m - [1mDeleted path test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/tmpu_ovefeb.[0m


globbing the folder test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04:

['test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/.folder_marker']


### Remove the created test folder

In order for a folder to be removed it must be empty

In [8]:
# remove the folder
gcs_client.remove_folder(f"{BUCKET_NAME}/{folder_name}/")
bucket_glob_str = f"{BUCKET_NAME}/*"
print(f"globbing the bucket {BUCKET_NAME}:\n\n{gcs_client.glob(bucket_glob_str)}")

[32m2025-07-23 18:46:54.379[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36mremove_folder[0m:[36m155[0m - [1mRemoved folder marker at gs://test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/.folder_marker[0m
[32m2025-07-23 18:46:54.503[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36mremove_folder[0m:[36m160[0m - [1mRemoved folder test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402/test_folder_be9cf0a2-4dae-4044-90fd-8cfe56889b04/[0m


globbing the bucket test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402:

[]


### Delete the test bucket

In order for a bucket to be removed, it must be empty (no folders)

In [9]:
# remove the bucket
gcs_client.delete_bucket(BUCKET_NAME)

all_buckets = gcs_client.list_buckets()
print(f"Available buckets: {all_buckets}")

[32m2025-07-23 18:46:55.079[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36mdelete_bucket[0m:[36m86[0m - [1mDeleted bucket test-bucket-collab-data_b132886a-f1ec-4b6d-af82-4727cd9a4402.[0m


Available buckets: ['fieldwork_curated', 'fieldwork_processed']
