In [50]:
import json
import os
from pathlib import Path
from dotenv import load_dotenv
import boto3
from botocore.exceptions import ClientError
import random

def serialize_json(folder, filename, data):
    if not os.path.exists(folder):
        os.makedirs(folder, exist_ok=True)
    with open(f"{folder}/{filename}", 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4, default=str)
        f.close()

def read_json(path):
    if os.path.exists(path):
        with open(path, "r", encoding="utf8") as file:
            data = json.load(file)
        return data
    else:
        return {}

# Section 1 - Environment variables loading

In [51]:
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

aws_account_id = os.getenv('aws_account_id')
aws_sdk_access_id = os.getenv('aws_sdk_access_id')
aws_sdk_access_secret = os.getenv('aws_sdk_access_secret')

aws_region = os.getenv('aws_region')
aws_private_bucket = os.getenv('aws_private_bucket')
aws_deploy_bucket = os.getenv('aws_deploy_bucket')

bing_api_key = os.getenv('bing_api_key')

task_name = os.getenv('task_name')
batch_name = os.getenv('batch_name')

# print("Printing env vars values:")
# print(f"AWS ACCOUNT ID: {aws_account_id}")
# print(f"AWS SDK ACCESS ID: {aws_sdk_access_id}")
# print(f"AWS SDK ACCESS SECRET: {aws_sdk_access_secret}")
# print(f"AWS REGION: {aws_region}")
# print(f"AWS PRIVATE BUCKET: {aws_private_bucket}")
# print(f"AWS DEPLOY BUCKET: {aws_deploy_bucket}")
# print(f"BING API KEY: {bing_api_key}")
# print(f"TASK NAME: {task_name}")
# print(f"BATCH NAME: {batch_name}")

# Section 2 - Setting up IAM policies and identity

In [63]:
iam = boto3.client('iam')

crowd_workers_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "allowWorkerInteraction",
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                f"arn:aws:s3:::{aws_private_bucket}",
                f"arn:aws:s3:::{aws_private_bucket}/*",
                f"arn:aws:s3:::{aws_deploy_bucket}",
                f"arn:aws:s3:::{aws_deploy_bucket}/*"
            ]
        }
    ]
}

policy = None
try:
    policy = iam.create_policy(
      PolicyName='crowd-workers-dev',
      PolicyDocument=json.dumps(crowd_workers_policy)
    )
    print(f"Crowd workers IAM policy with ARN {policy['Policy']['Arn']} creation completed, HTTP STATUS CODE: {policy['ResponseMetadata']['HTTPStatusCode']}.")
except (iam.exceptions.NoSuchEntityException, iam.exceptions.EntityAlreadyExistsException) as exception:
    policy = iam.get_policy(PolicyArn=f"arn:aws:iam::{aws_account_id}:policy/crowd-workers-dev")
    print(f"Crowd workers IAM policy with ARN {policy['Policy']['Arn']} retrieved, HTTP STATUS CODE: {policy['ResponseMetadata']['HTTPStatusCode']}.")
serialize_json("aws", f"policy_{policy['Policy']['PolicyName']}.json", policy)

user = None
try:
    user = iam.create_user(UserName="worker-dev")
    print(f"Crowd workers user {user['User']['UserName']} created, HTTP STATUS CODE: {user['ResponseMetadata']['HTTPStatusCode']}.")
except (iam.exceptions.NoSuchEntityException, iam.exceptions.EntityAlreadyExistsException) as exception:
    user = iam.get_user(UserName="worker-dev")
    print(f"Crowd worker user {user['User']['UserName']} retrieved, HTTP STATUS CODE: {user['ResponseMetadata']['HTTPStatusCode']}.")
serialize_json("aws", f"user_{user['User']['UserName']}_data.json", user)

response = iam.attach_user_policy(UserName=user['User']['UserName'], PolicyArn=policy['Policy']['Arn'])
policy = iam.get_policy(PolicyArn=f"{policy['Policy']['Arn']}")
print(f"Policy with ARN {policy['Policy']['Arn']} attached to user {user['User']['UserName']}, HTTP STATUS CODE: {user['ResponseMetadata']['HTTPStatusCode']}")

keys = []
try:
    key = iam.create_access_key(UserName=user['User']['UserName'])
    keys.append(key)
    serialize_json("aws", f"user_{user['User']['UserName']}_access_key_{key['AccessKey']['AccessKeyId']}.json", key)
    print(f"Access key with for user {user['User']['UserName']} created, HTTP STATUS CODE: {key['ResponseMetadata']['HTTPStatusCode']}.")
except iam.exceptions.LimitExceededException as exception:
    print(f"Two access keys for user {user['User']['UserName']} already present and retrieved")
    paginator = iam.get_paginator('list_access_keys')
    for found_keys in paginator.paginate(UserName=user['User']['UserName']):
        for key in found_keys['AccessKeyMetadata']:
            keys.append(key)
serialize_json("aws", f"user_{user['User']['UserName']}_access_keys.json", keys)

key_selected = random.choice(keys)
key_data = read_json(f"aws/user_{user['User']['UserName']}_access_key_{key_selected['AccessKeyId']}.json")

aws_worker_access_id = key_data['AccessKey']['AccessKeyId']
aws_worker_access_secret = key_data['AccessKey']['SecretAccessKey']

Crowd workers IAM policy with ARN arn:aws:iam::269559900417:policy/crowd-workers-dev retrieved, HTTP STATUS CODE: 200.
Crowd worker user worker-dev retrieved, HTTP STATUS CODE: 200.
Policy with ARN arn:aws:iam::269559900417:policy/crowd-workers-dev attached to user worker-dev, HTTP STATUS CODE: 200
Two access keys for user worker-dev already present and retrieved


# Section 3 - Private and deploy bucket creation

In [53]:
s3_client = boto3.client('s3', aws_access_key_id=aws_sdk_access_id, aws_secret_access_key=aws_sdk_access_secret)
s3_resource = boto3.resource('s3')

buckets = []
for bucket in s3_resource.buckets.all():
    buckets.append(bucket.name)

print(f"---------- INITIALIZING PRIVATE BUCKET {aws_private_bucket} ----------")

try:
    private_bucket = s3_client.create_bucket(
        Bucket=aws_private_bucket,
        CreateBucketConfiguration={
            'LocationConstraint': aws_region
        }
    )
    print(f"Bucket creation completed, HTTP STATUS CODE: {private_bucket['ResponseMetadata']['HTTPStatusCode']}.")
except s3_client.exceptions.BucketAlreadyOwnedByYou as error:
    private_bucket = s3_resource.Bucket(aws_private_bucket)
    print(f"Bucket already present, HTTP STATUS CODE: {error.response['ResponseMetadata']['HTTPStatusCode']}.")
serialize_json("aws", f"bucket_{aws_private_bucket}.json", private_bucket)

response = s3_client.put_public_access_block(
    Bucket=aws_private_bucket,
    PublicAccessBlockConfiguration={
        'BlockPublicAcls': True,
        'IgnorePublicAcls': True,
        'BlockPublicPolicy': True,
        'RestrictPublicBuckets': True
    },
)
print(f"Public access blocked, HTTP STATUS CODE: {response['ResponseMetadata']['HTTPStatusCode']}.")

private_bucket_policy = {
    "Version": "2012-10-17",
    "Id": "private-bucket-policy",
    "Statement": [
        {
            "Sid": "allow-bucket-interaction",
            "Effect": "Allow",
            "Principal": {
                "AWS": f"arn:aws:iam::{aws_account_id}:user/{user['User']['UserName']}"
            },
            "Action": [
                "s3:GetObject",
                "s3:PutObject",
                "s3:ListBucket"
            ],
            "Resource": [
                f"arn:aws:s3:::{aws_private_bucket}",
                f"arn:aws:s3:::{aws_private_bucket}/*"
            ]
        }
    ]
}

try:
    policy = s3_client.get_bucket_policy(Bucket=aws_private_bucket)
    policy['Policy'] = json.loads(policy['Policy'])
    print(f"Policy already present, HTTP STATUS CODE: {response['ResponseMetadata']['HTTPStatusCode']}.")
except ClientError as e:
    if e.response['Error']['Code'] == 'NoSuchBucketPolicy':
        response = s3_client.put_bucket_policy(Bucket=aws_private_bucket, Policy=json.dumps(private_bucket_policy))
        print(f"Policy configuration completed, HTTP STATUS CODE: {response['ResponseMetadata']['HTTPStatusCode']}.")
    policy = s3_client.get_bucket_policy(Bucket=aws_private_bucket)
    policy['Policy'] = json.loads(policy['Policy'])
serialize_json("aws", f"bucket_{aws_private_bucket}_policy.json", policy)

cors_configuration = {
    'CORSRules': [{
        'AllowedHeaders': ['*'],
        'AllowedMethods': ['GET', 'HEAD', 'PUT'],
        'AllowedOrigins': ['*'],
        'ExposeHeaders': [],
        'MaxAgeSeconds': 3000
    }]
}

try:
    cors_configuration = s3_client.get_bucket_cors(Bucket=aws_private_bucket)
    print(f"CORS Configuration already present, HTTP STATUS CODE: {response['ResponseMetadata']['HTTPStatusCode']}.")
except ClientError as e:
    if e.response['Error']['Code'] == 'NoSuchCORSConfiguration':
        response = s3_client.put_bucket_cors(Bucket=aws_private_bucket, CORSConfiguration=cors_configuration)
        print(f"CORS configuration completed, HTTP STATUS CODE: {response['ResponseMetadata']['HTTPStatusCode']}.")
    cors_configuration = s3_client.get_bucket_cors(Bucket=aws_private_bucket)
    cors_configuration['CORSRules'] = json.loads(cors_configuration['CORSRules'])
    print(cors_configuration)
serialize_json("aws", f"bucket_{aws_private_bucket}_cors.json", cors_configuration)

print(f"---------- INITIALIZATION COMPLETED ----------")

print(f"---------- INITIALIZING DEPLOY BUCKET {aws_deploy_bucket} ----------")

try:
    deploy_bucket = s3_client.create_bucket(
        Bucket=aws_deploy_bucket,
        CreateBucketConfiguration={
            'LocationConstraint': aws_region
        }
    )
    print(f"Bucket creation completed, HTTP STATUS CODE: {deploy_bucket['ResponseMetadata']['HTTPStatusCode']}.")
except s3_client.exceptions.BucketAlreadyOwnedByYou as error:
    deploy_bucket = s3_resource.Bucket(aws_deploy_bucket)
    print(f"Bucket already present, HTTP STATUS CODE: {error.response['ResponseMetadata']['HTTPStatusCode']}.")
serialize_json("aws", f"bucket_{aws_deploy_bucket}.json", deploy_bucket)

deploy_bucket_policy = {
    "Version": "2012-10-17",
    "Id": "deploy-bucket-policy",
    "Statement": [
        {
            "Sid": "allow-bucket-interaction",
            "Effect": "Allow",
            "Principal": {
                "AWS": f"arn:aws:iam::{aws_account_id}:user/{user['User']['UserName']}"
            },
            "Action": [
                "s3:GetObject",
                "s3:PutObject",
                "s3:ListBucket"
            ],
            "Resource": [
                f"arn:aws:s3:::{aws_deploy_bucket}",
                f"arn:aws:s3:::{aws_deploy_bucket}/*"
            ]
        }
    ]
}

try:
    policy = s3_client.get_bucket_policy(Bucket=aws_deploy_bucket)
    policy['Policy'] = json.loads(policy['Policy'])
    print(f"Policy already present, HTTP STATUS CODE: {response['ResponseMetadata']['HTTPStatusCode']}.")
except ClientError as e:
    if e.response['Error']['Code'] == 'NoSuchBucketPolicy':
        response = s3_client.put_bucket_policy(Bucket=aws_deploy_bucket, Policy=json.dumps(deploy_bucket_policy))
        print(f"Policy configuration completed, HTTP STATUS CODE: {response['ResponseMetadata']['HTTPStatusCode']}.")
    policy = s3_client.get_bucket_policy(Bucket=aws_deploy_bucket)
    policy['Policy'] = json.loads(policy['Policy'])
serialize_json("aws", f"bucket_{aws_private_bucket}_policy.json", policy)

print(f"---------- INITIALIZATION COMPLETED ----------")

---------- INITIALIZING PRIVATE BUCKET private-bucket-test-fdssfsdfdssdds ----------
Bucket already present, HTTP STATUS CODE: 409.
Public access blocked, HTTP STATUS CODE: 200.
Policy already present, HTTP STATUS CODE: 200.
CORS Configuration already present, HTTP STATUS CODE: 200.
---------- INITIALIZATION COMPLETED ----------
---------- INITIALIZING DEPLOY BUCKET deploy-bucket-test-fsfddfsfdsfsfsffssfdfsdfsfsdfsd ----------
Bucket already present, HTTP STATUS CODE: 409.
Policy already present, HTTP STATUS CODE: 200.
---------- INITIALIZATION COMPLETED ----------


# Section 4 - Environment file generation