In [1]:
import boto3
from botocore.exceptions import ClientError
from datetime import datetime

In [2]:
# Configuration
SOURCE_BUCKET = 'valencialistings'
DEST_BUCKET = 'prod-vlc-real-estate-analytics-listings'
REGION = 'eu-central-1'

# Initialize S3 client
s3_client = boto3.client('s3', region_name=REGION)

In [3]:
# List all objects in source bucket using pagination
def list_all_objects(bucket_name):
    """List all objects in an S3 bucket using pagination."""
    objects = []
    paginator = s3_client.get_paginator('list_objects_v2')
    
    for page in paginator.paginate(Bucket=bucket_name):
        if 'Contents' in page:
            objects.extend(page['Contents'])
    
    return objects

print(f"Listing objects in {SOURCE_BUCKET}...")
source_objects = list_all_objects(SOURCE_BUCKET)
print(f"Found {len(source_objects)} objects")

Listing objects in valencialistings...
Found 2373 objects


In [4]:
# Show sample of files to be copied
print("Sample files to be copied:")
for obj in source_objects[:10]:
    print(f"  - {obj['Key']} ({obj['Size']} bytes, modified: {obj['LastModified']})")

if len(source_objects) > 10:
    print(f"  ... and {len(source_objects) - 10} more files")

Sample files to be copied:
  - rent_20230409_120044_1.json (101265 bytes, modified: 2023-04-09 12:00:55+00:00)
  - rent_20230409_120044_2.json (100865 bytes, modified: 2023-04-09 12:00:55+00:00)
  - rent_20230409_120044_3.json (111871 bytes, modified: 2023-04-09 12:00:56+00:00)
  - rent_20230409_120044_4.json (115963 bytes, modified: 2023-04-09 12:00:56+00:00)
  - rent_20230409_120044_5.json (101650 bytes, modified: 2023-04-09 12:00:57+00:00)
  - rent_20230409_120044_6.json (16372 bytes, modified: 2023-04-09 12:00:57+00:00)
  - rent_20230416_120044_1.json (100502 bytes, modified: 2023-04-16 12:00:56+00:00)
  - rent_20230416_120044_2.json (96886 bytes, modified: 2023-04-16 12:00:56+00:00)
  - rent_20230416_120044_3.json (115956 bytes, modified: 2023-04-16 12:00:57+00:00)
  - rent_20230416_120044_4.json (110205 bytes, modified: 2023-04-16 12:00:57+00:00)
  ... and 2363 more files


In [5]:
# Copy all objects from source to destination
def copy_objects(source_bucket, dest_bucket, objects):
    """Copy objects from source bucket to destination bucket."""
    copied = 0
    failed = 0
    
    for i, obj in enumerate(objects, 1):
        key = obj['Key']
        
        try:
            # Copy object
            copy_source = {'Bucket': source_bucket, 'Key': key}
            s3_client.copy_object(
                CopySource=copy_source,
                Bucket=dest_bucket,
                Key=key
            )
            copied += 1
            
            # Progress update every 100 files
            if i % 100 == 0:
                print(f"Progress: {i}/{len(objects)} files processed ({copied} copied, {failed} failed)")
                
        except ClientError as e:
            print(f"Error copying {key}: {e}")
            failed += 1
    
    return copied, failed

print(f"\nStarting copy operation...")
start_time = datetime.now()

copied_count, failed_count = copy_objects(SOURCE_BUCKET, DEST_BUCKET, source_objects)

end_time = datetime.now()
duration = (end_time - start_time).total_seconds()

print(f"\nCopy operation completed!")
print(f"  - Successfully copied: {copied_count} files")
print(f"  - Failed: {failed_count} files")
print(f"  - Duration: {duration:.2f} seconds")


Starting copy operation...
Progress: 100/2373 files processed (100 copied, 0 failed)
Progress: 200/2373 files processed (200 copied, 0 failed)
Progress: 300/2373 files processed (300 copied, 0 failed)
Progress: 400/2373 files processed (400 copied, 0 failed)
Progress: 500/2373 files processed (500 copied, 0 failed)
Progress: 600/2373 files processed (600 copied, 0 failed)
Progress: 700/2373 files processed (700 copied, 0 failed)
Progress: 800/2373 files processed (800 copied, 0 failed)
Progress: 900/2373 files processed (900 copied, 0 failed)
Progress: 1000/2373 files processed (1000 copied, 0 failed)
Progress: 1100/2373 files processed (1100 copied, 0 failed)
Progress: 1200/2373 files processed (1200 copied, 0 failed)
Progress: 1300/2373 files processed (1300 copied, 0 failed)
Progress: 1400/2373 files processed (1400 copied, 0 failed)
Progress: 1500/2373 files processed (1500 copied, 0 failed)
Progress: 1600/2373 files processed (1600 copied, 0 failed)
Progress: 1700/2373 files proc

In [6]:
# Verify: List objects in destination bucket
print(f"\nVerifying files in {DEST_BUCKET}...")
dest_objects = list_all_objects(DEST_BUCKET)
print(f"Found {len(dest_objects)} objects in destination bucket")

if len(dest_objects) == len(source_objects):
    print("✅ Success! All files copied.")
else:
    print(f"⚠️ Warning: Expected {len(source_objects)} files, but found {len(dest_objects)}")


Verifying files in prod-vlc-real-estate-analytics-listings...
Found 2373 objects in destination bucket
✅ Success! All files copied.


In [7]:
# Optional: Show latest files in destination bucket
print("\nLatest 10 files in destination bucket:")
sorted_dest_objects = sorted(dest_objects, key=lambda x: x['LastModified'], reverse=True)
for obj in sorted_dest_objects[:10]:
    print(f"  - {obj['Key']} (modified: {obj['LastModified']})")


Latest 10 files in destination bucket:
  - sale_20251228_120045_9.json (modified: 2025-12-29 18:13:58+00:00)
  - sale_20251228_120045_3.json (modified: 2025-12-29 18:13:57+00:00)
  - sale_20251228_120045_4.json (modified: 2025-12-29 18:13:57+00:00)
  - sale_20251228_120045_5.json (modified: 2025-12-29 18:13:57+00:00)
  - sale_20251228_120045_6.json (modified: 2025-12-29 18:13:57+00:00)
  - sale_20251228_120045_7.json (modified: 2025-12-29 18:13:57+00:00)
  - sale_20251228_120045_8.json (modified: 2025-12-29 18:13:57+00:00)
  - sale_20251221_120045_5.json (modified: 2025-12-29 18:13:56+00:00)
  - sale_20251221_120045_6.json (modified: 2025-12-29 18:13:56+00:00)
  - sale_20251221_120045_7.json (modified: 2025-12-29 18:13:56+00:00)
