In [6]:
import boto3
import os

In [20]:
access_key = 'your_access_key'
secret_key = 'your_secret_key'
bucket_name = 'my-cfa-pdfs'
folder_key = 'pypdf/'
output_folder = './metadata_output/'

In [21]:
# Create an S3 client
# s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key)
s3 = boto3.client('s3')

In [22]:
try:
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_key)
    objects = response.get('Contents', [])

    if not objects:
        print("No files found in the specified folder.")
    else:
        # Create output folder if it doesn't exist
        os.makedirs(output_folder, exist_ok=True)

        # Save metadata to individual files
        print("Saving metadata to individual files:")
        for obj in objects:
            file_key = obj['Key']
            try:
                # Retrieve file metadata
                file_metadata = s3.head_object(Bucket=bucket_name, Key=file_key)
                
                # Extract metadata values
                last_modified = file_metadata['LastModified']
                etag = file_metadata['ETag']
                content_type = file_metadata.get('ContentType', 'N/A')
                content_length = file_metadata['ContentLength']
                s3_link = f"s3://{bucket_name}/{file_key}"

                # Create output file path
                output_file_path = os.path.join(output_folder, f"{file_key.replace('/', '_')}_metadata.txt")

                # Save metadata to file
                with open(output_file_path, 'w') as output_file:
                    output_file.write(f"Metadata for file: {file_key}\n")
                    output_file.write(f"  Last Modified: {last_modified}\n")
                    output_file.write(f"  ETag: {etag}\n")
                    output_file.write(f"  Content Type: {content_type}\n")
                    output_file.write(f"  Content Length: {content_length} bytes\n")
                    output_file.write(f"  S3 Link: {s3_link}\n")

                print(f"Metadata saved to {output_file_path}")

            except Exception as e:
                print(f"Error retrieving metadata for {file_key}: {e}")

except Exception as e:
    print(f"Error listing objects: {e}")

Saving metadata to individual files:
Metadata saved to ./metadata_output/pypdf__metadata.txt
Metadata saved to ./metadata_output/pypdf_2024-l1-topics-combined-2.txt_metadata.txt
Metadata saved to ./metadata_output/pypdf_2024-l1-topics-combined-2_extracted_text.txt_metadata.txt
Metadata saved to ./metadata_output/pypdf_2024-l2-topics-combined-2.txt_metadata.txt
Metadata saved to ./metadata_output/pypdf_2024-l2-topics-combined-2_extracted_text.txt_metadata.txt
Metadata saved to ./metadata_output/pypdf_2024-l3-topics-combined-2.txt_metadata.txt
Metadata saved to ./metadata_output/pypdf_2024-l3-topics-combined-2_extracted_text.txt_metadata.txt
Metadata saved to ./metadata_output/pypdf_coveo_links.txt_metadata.txt
