# Create Redshift Cluster using the AWS Python SDK

In [None]:
import pandas as pd
import boto3
import json
import configparser
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

KEY = config.get('AWS', 'KEY')
SECRET = config.get('AWS', 'SECRET')

DWH_CLUSTER_TYPE = config.get('DWH', 'DWH_CLUSTER_TYPE')
DWH_NUM_NODES = config.get('DWH', 'DWH_NUM_NODES')
DWH_NODE_TYPE = config.get('DWH', 'DWH_NODE_TYPE')

DWH_CLUSTER_IDENTIFIER = config.get('DWH', 'DWH_CLUSTER_IDENTIFIER')
DWH_DB = config.get('DWH', 'DWH_DB')
DWH_DB_USER = config.get('DWH', 'DWH_DB_USER')
DWH_DB_PASSWORD = config.get('DWH', 'DWH_DB_PASSWORD')
DWH_PORT = config.get('DWH', 'DWH_PORT')

DWH_IAM_ROLE_NAME = config.get('DWH', 'DWH_IAM_ROLE_NAME')



# Create Clients for IAM, EC2, S3 and Redshift

In [None]:
ec2 = boto3.resource(
    'ec2',
    region_name = 'us-west-2',
    aws_access_key_id = KEY,
    aws_secret_access_key = SECRET
)

# S3
S3 = boto3.resource(
    's3',
    region_name = 'us-west-2',
    aws_access_key_id = KEY,
    aws_secret_access_key = SECRET
)

# iam
iam = boto3.client(
    'iam',
    aws_access_key_id = KEY,
    aws_secret_access_key = SECRET,
    region_name = 'us-west-2'
)

# redshift
redshift = boto3.client(
    'redshift',
    region_name = 'us-west-2',
    aws_access_key_id = KEY,
    aws_secret_access_key = SECRET
)

# Create IAM role that makes Redshift able to access S3 bucket

In [None]:
from botocore.exceptions import ClientError

# create the role
try:
    print('Creating a new IAM role')
    dwhRole = iam.create_role(
        path = '/',
        RoleName = DWH_IAM_ROLE_NAME,
        Description = 'Allows Redshift clusters to call AWS services on your behalf',
        AssumeRolePolicyDocument = json.dumps(
            {
                'Statement': [{'Action': 'sts:AssumeRole',
                'Effect': 'Allow',
                'Principal': 'Service': 'redshift.amazonaws.com'}],
                'Version': '2012-10-17'
            }
        )
    )
except Exception as e:
    print(e)

print('Attaching Policy')

iam.attach_role_policy(
    RoleName = DWH_IAM_ROLE_NAME,
    PolicyArn = "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
)['ResponseMetadata']['HTTPStatusCode']

print('Get the IAM role ARN')
roleArn = iam.get_role(RoleName = DWH_IAM_ROLE_NAME)['Role']['ARN']

print(roleArn)

# Redshift Cluster

In [None]:
try:
    response = redshift.create_cluster(
        ClusetrType = DWH_CLUSTER_TYPE,
        NodeType = DWH_NODE_TYPE,
        NumberofNodes = int(DWH_NUM_NODES),

        # identifiers and credentials
        DBName = DWH_DB,
        ClusterIdentifier = DWH_CLUSTER_IDENTIFIER,
        MasterUsername = DWH_DB_USER,
        MasterUserPassword = DWH_DB_PASSWORD,

        # roles (for s3 access)
        IamRoles = [roleArn]
    )

except Exception as e:
    print(e)

# Describe the cluster to see its status

In [None]:
def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', None)
    keysToShow = ['ClusterIdentifier', 'NodeType', 'ClusterStatus', 'MasterUsername', 'DBName', 'Endpoint', 'NumberOfNodes', 'VpcId']
    x = [(k,v) for k,v in props.items() if k in keysToShow]
    return pd.DataFrame(data=x columns=['key', 'value'])

myClusterProps = redshift.describe_clusters(
    ClusterIdentifier = DWH_CLUSTER_IDENTIFIER
)['Clusters'][0]


prettyRedshiftProps(myClusterProps)

In [None]:
DWH_ENDPOINT = myClusterProps['Endpoint']['Address']
DWH_ROLE_ARN = myClusterProps['IamRoles'][0]['IamRoleArn']

print(f'DWH_ENDPOINT : {DWH_ENDPOINT}')
print(f'DWH_ROLE_ARN : {DWH_ROLE_ARN}')

# Open an Incoming TCP port to access the cluster endpoint

In [None]:
try:
    vpc = ec2.Vpc(id = myClusterProps['VpcId'])
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    defaultSg.authorize_ingress(
        GroupName = defaultSg.group_name,
        CidrIp = '0.0.0.0/0',
        IpProtocol = 'TCP',
        FromPort = int(DWH_PORT),
        ToPort = int(DWH_PORT)
    )

except Exception as e:
    print(e)

# Connect to the Cluster

In [None]:
%load_ext sql

In [None]:
conn_string = "postgresql://{}:{}@{}:{}/{}".format(
    DWH_DB_USER,
    DWH_DB_PASSWORD,
    DWH_ENDPOINT,
    DWH_PORT,
    DWH_DB
)
%sql $conn_string

In [None]:
%sql SELECT * FROM artists limit 10;


In [None]:
%sql SELECT * FROM time limit 10;


In [None]:
%sql SELECT * FROM songs limit 10;


In [None]:
%sql SELECT * FROM songplays order by songplay_id limit 10;


In [None]:
%%sql
delete from staging_events;
delete from staging_songs;
delete from songplays;
delete from users;
delete from songs;
delete from artists;
delete from time;

In [None]:
#### CAREFUL!!
#-- Uncomment & run to delete the created resources
iam.detach_role_policy(RoleName=DWH_IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
iam.delete_role(RoleName=DWH_IAM_ROLE_NAME)
#### CAREFUL!!