# Example of loading GCS data

In [8]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
from dotenv import load_dotenv
import shutil

from collab_data.file_utils import expand_path, get_project_root
from collab_data.gcs_utils import GCSClient

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Grab API keys from environment

In [None]:
# Load environment variables from .env file
load_dotenv()

gerbils_key = os.environ.get("COLLAB_GERBILS_KEY")
data_key = os.environ.get("COLLAB_DATA_KEY")

print(f"Gerbils key: {gerbils_key}")
print(f"Data key: {data_key}")

Gerbils key: /Users/tommybotch/Documents/api-keys/collab-gerbils-444419-2c6aa9a47bb5.json
Data key: /Users/tommybotch/Documents/api-keys/collab-data-463313-c340ad86b28e.json


Grab for gerbils data

In [None]:
CURRENT_PROJECT = "COLLAB_GERBILS"
PROJECT_KEY = Path(os.environ.get(f"{CURRENT_PROJECT}_KEY"))
PROJECT_ID = "-".join(PROJECT_KEY.stem.split("-")[:-1])

# Connect to GCS
gcs_client = GCSClient(
    project_id=PROJECT_ID,
    credentials_path=expand_path(PROJECT_KEY.as_posix(), get_project_root()),
)

[32m2025-07-22 17:24:53.492[0m | [1mINFO    [0m | [36mcollab_data.gcs_utils[0m:[36m__init__[0m:[36m34[0m - [1mUsing credentials from /Users/tommybotch/Documents/api-keys/collab-gerbils-444419-2c6aa9a47bb5.json[0m
[32m2025-07-22 17:24:53.497[0m | [1mINFO    [0m | [36mcollab_data.gcs_utils[0m:[36m__init__[0m:[36m40[0m - [1mUsing project collab-gerbils-444419[0m


Show current buckets

In [4]:
all_buckets = gcs_client.list_buckets()
print(f"Available buckets: {all_buckets}")

Available buckets: ['collab-gerbils-444419_cloudbuild', 'collab-gerbils-raw', 'gerbil-videos']


Grab a file from the videos bucket

In [None]:
# Grab a file
file_path = gcs_client.glob(f"{all_buckets[-1]}/cohort2/*final*")[0]

# Create a local path for downloading
local_data_path = Path("./data") / Path(file_path).name

# Download the file
gcs_client.gcs.get_file(
    rpath=file_path,
    lpath=local_data_path.as_posix(),
)

### Remove downloaded file

In [None]:
shutil.rmtree("./data")