# 下载整个文件夹

In [8]:
import boto3
import os
from botocore.exceptions import NoCredentialsError

# 配置AWS凭证（如果需要）
aws_access_key_id = 'YOUR_ACCESS_KEY_ID'
aws_secret_access_key = 'YOUR_SECRET_ACCESS_KEY'
region_name = 'eu-west-2'  # 例如 'us-west-2'

# 创建S3客户端
s3 = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)

def download_directory_from_s3(bucket_name, s3_prefix, local_dir):
    """
    下载S3桶中的文件夹到本地目录
    :param bucket_name: S3桶名称
    :param s3_prefix: S3文件夹前缀
    :param local_dir: 本地目录路径
    """
    if not os.path.exists(local_dir):
        os.makedirs(local_dir)

    paginator = s3.get_paginator('list_objects_v2')
    for page in paginator.paginate(Bucket=bucket_name, Prefix=s3_prefix):
        for obj in page.get('Contents', []):
            key = obj['Key']
            local_file_path = os.path.join(local_dir, os.path.relpath(key, s3_prefix))
            local_file_dir = os.path.dirname(local_file_path)

            if not os.path.exists(local_file_dir):
                os.makedirs(local_file_dir)

            try:
                s3.download_file(bucket_name, key, local_file_path)
                print(f"File downloaded successfully to {local_file_path}")
            except NoCredentialsError:
                print("Credentials not available")
            except Exception as e:
                print(f"Error occurred: {e}")

# 使用示例
bucket_name = 'shareddatasetllm'
s3_prefix = 'train_test_data/'  # 例如 'data/folder/'
local_dir = 'train_test_data/'  # 例如 '/home/user/folder/'

download_directory_from_s3(bucket_name, s3_prefix, local_dir)

File downloaded successfully to train_test_data/template.json
File downloaded successfully to train_test_data/test.jsonl
File downloaded successfully to train_test_data/train.jsonl


# 下载单个文件

In [None]:
import boto3
from botocore.exceptions import NoCredentialsError

# 配置AWS凭证（如果需要）
aws_access_key_id = 'YOUR_ACCESS_KEY_ID'
aws_secret_access_key = 'YOUR_SECRET_ACCESS_KEY'
region_name = 'YOUR_REGION'  # 例如 'us-west-2'

# 创建S3客户端
s3 = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)

# 定义S3桶名称和文件键
bucket_name = 'your-s3-bucket-name'
object_key = 'path/to/your/object.ext'  # 例如 'data/file.txt'
local_file_path = 'local/path/to/save/object.ext'  # 例如 '/home/user/file.txt'

# 下载文件
try:
    s3.download_file(bucket_name, object_key, local_file_path)
    print(f"File downloaded successfully to {local_file_path}")
except NoCredentialsError:
    print("Credentials not available")
except Exception as e:
    print(f"Error occurred: {e}")


In [None]:
import boto3
from botocore.exceptions import NoCredentialsError

# 创建S3客户端，使用默认配置文件中的凭证
s3 = boto3.client('s3')

# 定义S3桶名称和文件键
bucket_name = 'my-s3-bucket'
object_key = 'data/myfile.txt'  # 对象键，表示文件在S3桶中的路径
local_file_path = '/home/user/myfile.txt'  # 本地文件路径

# 下载文件
try:
    s3.download_file(bucket_name, object_key, local_file_path)
    print(f"File downloaded successfully to {local_file_path}")
except NoCredentialsError:
    print("Credentials not available")
except Exception as e:
    print(f"Error occurred: {e}")


# 上传文件到s3

In [None]:
import boto3

# 配置AWS凭证
aws_access_key_id = 'YOUR_ACCESS_KEY_ID'
aws_secret_access_key = 'YOUR_SECRET_ACCESS_KEY'
region_name = 'YOUR_REGION'  # 例如 'us-west-2'

# 创建S3客户端
s3 = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)

# 示例操作：上传文件到S3
bucket_name = 'your-bucket-name'
object_key = 'path/to/object'
local_file_path = 'path/to/local/file'

s3.upload_file(local_file_path, bucket_name, object_key)
