In [4]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Navigate to project directory (make sure this path is correct for you)
import os
os.chdir('/content/drive/Othercomputers/My_Mac/sentinel')

# Install required packages
!pip install -q google-cloud-storage wget tqdm

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for wget (setup.py) ... [?25l[?25hdone


In [5]:
# Setup GCS credentials
from google.colab import auth
auth.authenticate_user()

# Set project ID
!gcloud config set project bug-sync-467815

Updated property [core/project].


In [None]:
from google.cloud import storage
from tqdm import tqdm
import os

def upload_to_gcs(local_dir, bucket_name, gcs_prefix=''):
    """Uploads the contents of a directory to a GCS bucket."""
    if not os.path.exists(local_dir):
        print(f"Error: Local directory '{local_dir}' does not exist. Skipping upload.")
        return

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    total_files = sum([len(files) for _, _, files in os.walk(local_dir)])

    with tqdm(total=total_files, desc=f"Uploading from {local_dir}") as pbar:
        for root, dirs, files in os.walk(local_dir):
            for file in files:
                local_path = os.path.join(root, file)
                relative_path = os.path.relpath(local_path, local_dir)
                gcs_path = os.path.join(gcs_prefix, relative_path)

                blob = bucket.blob(gcs_path)
                blob.upload_from_filename(local_path)
                pbar.update(1)

In [None]:
# --- 1. DOWNLOAD VELODYNE DATA ---
print("--- Processing Velodyne Data (Step 1/2) ---")
velodyne_url = "https://s3.eu-central-1.amazonaws.com/avg-kitti/data_odometry_velodyne.zip"
!wget --user-agent="Mozilla/5.0" -O data_odometry_velodyne.zip "{velodyne_url}"

# --- 2. EXTRACT VELODYNE DATA ---
print("\nExtracting velodyne data...")
!unzip -o -q data_odometry_velodyne.zip -d /content/kitti_data
print("Extraction complete.")



--- Processing Velodyne Data (Step 1/2) ---
--2025-10-10 07:33:56--  https://s3.eu-central-1.amazonaws.com/avg-kitti/data_odometry_velodyne.zip
Resolving s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)... 3.5.139.209, 3.5.139.170, 52.219.171.65, ...
Connecting to s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)|3.5.139.209|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 84786535790 (79G) [application/zip]
Saving to: ‘data_odometry_velodyne.zip’


2025-10-10 08:51:00 (17.5 MB/s) - ‘data_odometry_velodyne.zip’ saved [84786535790/84786535790]


Extracting velodyne data...
Extraction complete.


In [None]:
# --- 3. UPLOAD VELODYNE DATA TO GCS ---
# The zip extracts into /content/kitti_data/dataset/sequences
upload_dir = '/content/kitti_data/dataset'
upload_to_gcs(upload_dir, 'sentinel-kitti-data', '')



# --- 4. CLEAN UP LOCAL VELODYNE FILES ---
print("\nCleaning up local velodyne files...")
!rm data_odometry_velodyne.zip
!rm -rf /content/kitti_data
print("Cleanup complete.")

Uploading from /content/kitti_data/dataset: 100%|██████████| 43552/43552 [2:28:41<00:00,  4.88it/s]


In [None]:
from google.colab import drive

# 1. Mount your Google Drive
drive.mount('/content/drive')

# --- THIS IS THE FIX ---
# 2. Create the destination directory before copying
!mkdir -p /content/temp_kitti

# 3. Define your source and destination paths
source_path_in_drive = "/content/drive/Othercomputers/My_Mac/sentinel/data_odometry_labels.zip"
destination_path_in_colab = "/content/temp_kitti/data_odometry_labels.zip"

print(f"Copying file from Drive to Colab...")
# 4. Now the copy command will succeed
!cp "{source_path_in_drive}" "{destination_path_in_colab}"

print("\nCopy complete. Verifying file size in Colab:")
!ls -lh /content/temp_kitti/data_odometry_labels.zip

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Copying file from Drive to Colab...

Copy complete. Verifying file size in Colab:
-rw------- 1 root root 171M Oct 10 12:29 /content/temp_kitti/data_odometry_labels.zip


In [None]:
import os

# Define the directory where files will be extracted
EXTRACT_DIR = '/content/kitti_data'
os.makedirs(EXTRACT_DIR, exist_ok=True)

# Define the names of the downloaded files
files_to_unzip = ['data_odometry_velodyne.zip', 'data_odometry_labels.zip']

print("Extracting all zip files using system unzip command...")

for file in files_to_unzip:
    if os.path.exists(file):
        print(f"Extracting {file}...")
        # Use the robust system command !unzip
        # -o: overwrite files without asking
        # -q: quiet mode to prevent printing thousands of filenames
        !unzip -o -q {file} -d {EXTRACT_DIR}
    else:
        print(f"Warning: {file} not found. Skipping extraction.")

print("\nAll files extracted.")

Extracting all zip files using system unzip command...
Extracting data_odometry_velodyne.zip...
Extracting data_odometry_labels.zip...
error [data_odometry_labels.zip]:  missing 16777216 bytes in zipfile
  (attempting to process anyway)
error: invalid zip file with overlapped components (possible zip bomb)

All files extracted.


In [None]:
from google.cloud import storage

client = storage.Client()
bucket = client.bucket('sentinel-kitti-data')

# List the first 10 files to verify they are in the 'sequences' folder
blobs = bucket.list_blobs(prefix='sequences/', max_results=10)

print("First 10 files in GCS bucket under 'sequences/':")
for blob in blobs:
    print(f"  - {blob.name}")

# Clean up local temporary files
print("\nCleaning up local files...")
!rm -rf /content/temp_kitti
!rm -rf /content/kitti_data

print("Data setup complete!")

First 10 files in GCS bucket under 'sequences/':

Cleaning up local files...
shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
Data setup complete!


In [2]:
import os

# Define the directory where the file will be extracted
EXTRACT_DIR = '/content/kitti_data'
os.makedirs(EXTRACT_DIR, exist_ok=True)

# The name of the zip file you copied to Colab
labels_zip_file = '/content/drive/Othercomputers/My_Mac/sentinel/data_odometry_labels.zip'

print(f"Extracting {labels_zip_file}...")
# Use the robust system command !unzip
# -o: overwrite files without asking
# -q: quiet mode to prevent printing thousands of filenames
!unzip -o -q {labels_zip_file} -d {EXTRACT_DIR}

print("\nExtraction complete.")

Extracting /content/drive/Othercomputers/My_Mac/sentinel/data_odometry_labels.zip...

Extraction complete.


In [6]:
from google.cloud import storage
from tqdm import tqdm
import os

def upload_to_gcs(local_dir, bucket_name, gcs_prefix=''):
    """Uploads the contents of a directory to a GCS bucket."""
    if not os.path.exists(local_dir):
        print(f"Error: Local directory '{local_dir}' does not exist. Skipping upload.")
        return

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    total_files = sum([len(files) for _, _, files in os.walk(local_dir)])

    with tqdm(total=total_files, desc=f"Uploading from {local_dir}") as pbar:
        for root, dirs, files in os.walk(local_dir):
            for file in files:
                local_path = os.path.join(root, file)
                relative_path = os.path.relpath(local_path, local_dir)
                gcs_path = os.path.join(gcs_prefix, relative_path)

                blob = bucket.blob(gcs_path)
                blob.upload_from_filename(local_path)
                pbar.update(1)

# The path to the extracted 'dataset' directory
upload_dir = '/content/kitti_data/dataset'

print(f"Uploading contents of {upload_dir} to GCS bucket 'sentinel-kitti-data'")
upload_to_gcs(upload_dir, 'sentinel-kitti-data', '')

Uploading contents of /content/kitti_data/dataset to GCS bucket 'sentinel-kitti-data'


Uploading from /content/kitti_data/dataset: 100%|██████████| 23223/23223 [4:25:31<00:00,  1.46it/s]
