In [1]:
import boto3
import json
from botocore.exceptions import ClientError
import configparser

# Cluster Setup Procedures
#### load credentials and parameters
from the `dwh` config file

In [2]:
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

ACCESS_KEY_ID = config.get('aws_credentials','access_key_id')
SECRET_ACCESS_KEY = config.get('aws_credentials','secret_access_key')

REDSHIFT_ROLE_NAME = config.get('cluster_settings','redshift_role_name')
CLUSTER_TYPE = config.get('cluster_settings','cluster_type')
NUMBER_OF_NODES = config.get('cluster_settings','number_of_nodes')
NODE_TYPE = config.get('cluster_settings','node_type')
CLUSTER_IDENTIFIER = config.get('cluster_settings','cluster_identifier')
DB_NAME = config.get('cluster_settings','db_name')
MASTER_USER_NAME = config.get('cluster_settings','master_user_name')
MASTER_USER_PASSWORD = config.get('cluster_settings','master_user_password')
PORT = config.get('cluster_settings','port')

#### create clients/resources
for the IAM, EC2, S3 and Redshift services we'll use.  

For demonstration purposes, the State of Oregon ("us-west-2") is set as the default region providing our AWS Services.

In [3]:

iam = boto3.client(
     'iam'
    ,region_name = 'us-west-2'
    ,aws_access_key_id = ACCESS_KEY_ID
    ,aws_secret_access_key = SECRET_ACCESS_KEY
)

ec2 = boto3.resource(
     'ec2'
    ,region_name = 'us-west-2'
    ,aws_access_key_id = ACCESS_KEY_ID
    ,aws_secret_access_key = SECRET_ACCESS_KEY
) 

s3 = boto3.resource(
     's3'
    ,region_name = 'us-west-2'
    ,aws_access_key_id = ACCESS_KEY_ID
    ,aws_secret_access_key = SECRET_ACCESS_KEY
) 

redshift = boto3.client(
     'redshift'
    ,region_name = 'us-west-2'
    ,aws_access_key_id = ACCESS_KEY_ID
    ,aws_secret_access_key = SECRET_ACCESS_KEY
) 

#### Create an IAM role
granting Read-Only access for Redshift to an S3 bucket

> [IAM Roles documentation](<https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html>)
  
> [IAM objects guidelines](<https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_iam-limits.html>)
  
> [boto3 IAM `create_role()` method](<https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/iam.html#IAM.Client.create_role>)

In [4]:
# create the IAM Role
try:    
    iam.create_role(
         Path='/'
        ,RoleName=REDSHIFT_ROLE_NAME
        ,Description='Allows Redshift access to S3 Buckets'
        ,AssumeRolePolicyDocument=json.dumps({
            'Statement':[{
                 'Action':'sts:AssumeRole'
                ,'Effect':'Allow'
                ,'Principal':{
                    'Service':'redshift.amazonaws.com'
                }
            }]
        ,'Version': '2012-10-17'
        })    
    )
    
    print("'dwhRole' IAM role succesfully created.")
    
except Exception as exception:
    print(exception)
    
iam.attach_role_policy(
     RoleName=REDSHIFT_ROLE_NAME
    ,PolicyArn='arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess'    
)
print("'AmazonS3ReadOnlyAccess' policy successfully attached to 'dwhRole'")

REDSHIFT_ROLE_ARN = iam.get_role(RoleName=REDSHIFT_ROLE_NAME)['Role']['Arn']
print("'dwhRole' ARN successfully retrieved.")

# role ARN is stored within ConfigParser for later storage in config file
config.set('cluster_settings','redshift_role_arn',REDSHIFT_ROLE_ARN)

'dwhRole' IAM role succesfully created.
'AmazonS3ReadOnlyAccess' policy successfully attached to 'dwhRole'
'dwhRole' ARN successfully retrieved.


#### spin up a Redshift cluster instance:
for demonstration purposes it is defined as publicly accessible.
> [Redshift's boto3 `create_cluster()` documentation](<https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift.html#Redshift.Client.create_cluster>)

In [5]:

try:
    redshift.create_cluster(
        # cluster settings
         ClusterIdentifier = CLUSTER_IDENTIFIER
        ,ClusterType = CLUSTER_TYPE
        ,NodeType = NODE_TYPE
        ,NumberOfNodes = int(NUMBER_OF_NODES)
        ,IamRoles = [REDSHIFT_ROLE_ARN]
        ,PubliclyAccessible = True
        
        # database settings
        ,DBName = DB_NAME
        ,MasterUsername = MASTER_USER_NAME
        ,MasterUserPassword = MASTER_USER_PASSWORD        
    )
    
except Exception as exception:
    print(exception)

#### use the `describe_clusters()` method
to check cluster creation status and retrive useful data on the cluster
> [Redshift's boto3 `describe_clusters()` documentation](<https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift.html#Redshift.Client.describe_clusters>)

In [7]:
cluster_status = redshift.describe_clusters(
    ClusterIdentifier = CLUSTER_IDENTIFIER
)

print(cluster_status['Clusters'][0]['ClusterStatus'])

available


In [8]:
cluster_status = redshift.describe_clusters(
    ClusterIdentifier = CLUSTER_IDENTIFIER
)

if cluster_status['Clusters'][0]['ClusterStatus'] == 'available':
    
    ENDPOINT = cluster_status['Clusters'][0]['Endpoint']['Address']

    config.set('cluster_settings','endpoint',ENDPOINT)

    # new data within ConfigParser is fed back to the credentials file
    with open('dwh.cfg','w') as config_file:
        config.write(config_file)

#### Open a TCP port
to make the Cluster accessible via its Endpoint address:
> [EC2 `Vpc`Class documentation](<https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#vpc>)  
  
> [AWS Docs on Virtual Private Cloud's (VPC) Security Groups](<https://docs.aws.amazon.com/vpc/latest/userguide/VPC_SecurityGroups.html>)
  
> [EC2 `Security Groups` boto3 examples](<https://boto3.amazonaws.com/v1/documentation/api/latest/guide/ec2-example-security-group.html>)

In [None]:
try:
    #  VPC Class instance is created. Its Id is fetched
    # from the "cluster_status" object created above.
    vpc = ec2.Vpc(id=cluster_status['Clusters'][0]['VpcId'])

    #  The "security_groups" attribute is used in conjunction
    # with its "all()" method to return a Collection containing
    # the Security Groups available. It's then converted into
    # a list object.
    security_groups_list = list(vpc.security_groups.all())

    for sec_group in security_groups_list:
        
        print('Authorizing security group:',sec_group.group_name)

        try:
            sec_group.authorize_ingress(
                 GroupName=sec_group.group_name
                ,CidrIp='0.0.0.0/0'
                ,IpProtocol='TCP'
                ,FromPort=int(PORT)
                ,ToPort=int(PORT)
            )

        except Exception as exception:
            print(exception)
            
except Exception as exception:
    print(exception)


#### IPython-SQL is used
to check whether a connection to the Cluster is possible  
> [IPython-SQL github repo and documentation](<https://github.com/catherinedevlin/ipython-sql>)

In [10]:
%load_ext sql

conn_string = f'postgresql://{MASTER_USER_NAME}:{MASTER_USER_PASSWORD}@{ENDPOINT}:{PORT}/{DB_NAME}'
        
%sql $conn_string

'Connected: sparkify@sparkify_dw'

# Cluster Take Down Procedures
#### the Redshift Cluster instance
is unprovisioned using its `delete_cluster()` method

In [None]:
redshift.delete_cluster(
     ClusterIdentifier=CLUSTER_IDENTIFIER
    ,SkipFinalClusterSnapshot=True
)

#### use Redshift's `describe_clusters()` method
to monitor Cluster status

In [None]:
cluster_status = redshift.describe_clusters(
    ClusterIdentifier = CLUSTER_IDENTIFIER
)

print(cluster_status['Clusters'][0]['ClusterStatus'])

#### IAM Roles are also cleared
by first detaching their previously attached policies and then effectively deleting them:

In [14]:
# the S3 Read-Only policy is detached
iam.detach_role_policy(
     RoleName=REDSHIFT_ROLE_NAME
    ,PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
)

# the Role is then deleted
iam.delete_role(RoleName=REDSHIFT_ROLE_NAME)

{'ResponseMetadata': {'RequestId': 'a7dfd354-5279-4675-9c2a-ba3130b670eb',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a7dfd354-5279-4675-9c2a-ba3130b670eb',
   'content-type': 'text/xml',
   'content-length': '200',
   'date': 'Sat, 04 Apr 2020 19:55:13 GMT'},
  'RetryAttempts': 0}}