# GCS Access

In [1]:
import os
from pathlib import Path

from dotenv import load_dotenv

from collab_env.data.file_utils import expand_path, get_project_root
from collab_env.data.gcs_utils import GCSClient

%load_ext autoreload
%autoreload 2

## Example 1: Downloading files

### Grab API keys from environment

In this example, the project directory should contain an .env file containing the paths to the credential files for each gcs project. Your specific setup may vary.

In [2]:
# Load environment variables from .env file
load_dotenv()

gerbils_key = expand_path(os.environ.get("COLLAB_GERBILS_KEY", ""), get_project_root())
data_key = expand_path(os.environ.get("COLLAB_DATA_KEY", ""), get_project_root())

print(f"Gerbils key: {gerbils_key}")
print(f"Data key: {data_key}")

Gerbils key: /Users/dima/git/collab-environment/config-local/collab-gerbils-444419-2c6aa9a47bb5.json
Data key: /Users/dima/git/collab-environment/config-local/collab-data-463313-c340ad86b28e.json


### Connect

In [3]:
CURRENT_PROJECT = "COLLAB_GERBILS"
PROJECT_KEY = Path(os.environ.get(f"{CURRENT_PROJECT}_KEY"))  # type: ignore
PROJECT_ID = "-".join(PROJECT_KEY.stem.split("-")[:-1])

# Connect to GCS
gcs_client = GCSClient(
    project_id=PROJECT_ID,
    credentials_path=expand_path(PROJECT_KEY.as_posix(), get_project_root()),
)

[32m2025-07-23 18:51:03.553[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36m__init__[0m:[36m34[0m - [1mUsing credentials from /Users/dima/git/collab-environment/config-local/collab-gerbils-444419-2c6aa9a47bb5.json[0m
[32m2025-07-23 18:51:03.554[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36m__init__[0m:[36m40[0m - [1mUsing project collab-gerbils-444419[0m


### Show current buckets

In [4]:
all_buckets = gcs_client.list_buckets()
print(f"Available buckets: {all_buckets}")

Available buckets: ['collab-gerbils-444419_cloudbuild', 'collab-gerbils-raw', 'gerbil-videos']


### Grab a file from the videos bucket

In [5]:
# Grab a file
file_path = gcs_client.glob(f"{all_buckets[-1]}/cohort2/*final*")[0]

# Create a local path for downloading
local_data_path = expand_path(Path("./data") / Path(file_path).name, get_project_root())

# Download the file
gcs_client.gcs.get_file(
    rpath=file_path,
    lpath=local_data_path.as_posix(),
)

### Remove downloaded file

In [6]:
os.remove(local_data_path)