In [19]:
import boto3
import json
import psycopg2

In [9]:
access_key = 'Your-access-key'
secret_key = 'Your-secret-key'
vpc_id = 'Your default VPC Id'

In [10]:
#create security group by passing vpc_id and group name
from botocore.exceptions import ClientError

#create security group
ec2_client = boto3.client('ec2',
                          region_name='us-east-2',
                          aws_access_key_id = access_key,
                          aws_secret_access_key=secret_key)

group_name = 'my-redshift-security-group'
group_description = 'Security group for redshift cluster access'

try:
    response = ec2_client.create_security_group(
        GroupName=group_name,
        Description=group_description,
        VpcId=vpc_id
    )
    security_group_id = response['GroupId']
    print('Created security group with ID: ', security_group_id)
except ClientError as e:
    if e.response['Error']['Code'] == 'InvalidGroup.Duplicate':
        #The security group already exists
        response = ec2_client.describe_security_groups(
            Filters=[
                {'Name':'group-name', 'Values':[group_name]},
                {'Name':'vpc-id', 'Values':[vpc_id]}
            ]
        )
        security_group_id = response['SecurityGroups'][0]['GroupId']
        print('Security group already exists. Using existing security group with ID: ', security_group_id)
    else:
        #Handle other exceptions
        print('Error creating security group:', e)

Created security group with ID:  sg-03103c2cd4713cca3


In [11]:
#create inbound rule for security group

port = 5439 #redshift port
ip_range = '0.0.0.0/0'

try:
    # Add the inbound rule to the security group
    response = ec2_client.authorize_security_group_ingress(
        GroupId=security_group_id,
        IpPermissions=[
            {
                # SSH ingress open to only the specified IP address.
                "IpProtocol": "tcp",
                "FromPort": port,
                "ToPort": port,
                "IpRanges": [{"CidrIp": ip_range}],
            }
        ]
    )
    print('Inbound rule added to the security group.')
except ClientError as e:
    if e.response['Error']['Code'] == 'InvalidPermission.Duplicate':
        print('Inbound rule already exists for the specified port and IP range')
    else:
        print('Error adding the inbound rule', e)

Inbound rule added to the security group.


In [12]:
# Create the IAM role
iam = boto3.client('iam',
                       region_name='us-east-2',
                       aws_access_key_id=access_key,
                       aws_secret_access_key=secret_key)
DWH_IAM_ROLE_NAME = 'redshifttest'
try:
    print('1.1 Creating a new IAM Role')
    dwh_role = iam.create_role(
    Path = '/',
    RoleName = DWH_IAM_ROLE_NAME,
    Description = 'Allows Redshift cluster to call AWS service on your behalf.',
    AssumeRolePolicyDocument = json.dumps(
        {'Statement': [{'Action': 'sts:AssumeRole',
                    'Effect': 'Allow', 
                    'Principal': {'Service': 'redshift.amazonaws.com'}}],
        'Version': '2012-10-17'})
    )
    # Attach Policy
    iam.attach_role_policy(RoleName=DWH_IAM_ROLE_NAME,
                            PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess"
                            )['ResponseMetadata']['HTTPStatusCode']
    role_arn = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)['Role']['Arn']
    print(role_arn)
except Exception as e:
    print(e)

1.1 Creating a new IAM Role
arn:aws:iam::339713044034:role/redshifttest


In [16]:
#Define the cluster parameters used to create cluster in redshift
cluster_parameters = {
    #HW
    'ClusterType':'single-node',
    'NodeType':'dc2.large',
    'NumberOfNodes':1,

    #Identifiers & Credentials
    'DBName':'mydatabase',
    'ClusterIdentifier':'my-redshift-cluster',
    'MasterUsername':'myawsuser',
    'MasterUserPassword':'Password123',
    'PubliclyAccessible': True,
    
    #Roles (for s3 access)
    'IamRoles':[role_arn]
}

In [17]:
redshift_client = boto3.client('redshift',
                                region_name='us-east-2',
                                aws_access_key_id=access_key,
                                aws_secret_access_key=secret_key)

In [18]:
# create the redshift cluster
try:
    response = redshift_client.create_cluster(**cluster_parameters)
    print('Redshift cluster creation intiated')
except redshift_client.exceptions.ClusterAlreadyExistsFault:
    print('Cluster already exists. Skipping cluster creation')

# wait for the cluster to be available
redshift_client.get_waiter('cluster_available').wait(
    ClusterIdentifier = cluster_parameters['ClusterIdentifier']
)

print('Redshift cluster is now available')

Redshift cluster creation intiated
Redshift cluster is now available


In [22]:
# create a connnection to redshift using psycopg2
redshift_endpoint = 'my-redshift-cluster.cfk74nwsnnkl.us-east-2.redshift.amazonaws.com'
redshift_port = 5439
redshift_user = 'myawsuser'
redshift_password = 'Password123'
redshift_database = 'mydatabase'
redshift_table = 'product_table'

conn = psycopg2.connect(host=redshift_endpoint,
                        port=redshift_port,
                        database=redshift_database,
                        user=redshift_user,
                        password=redshift_password)

cursor = conn.cursor()

# create the table if it does not exist
create_table_command = """
CREATE TABLE IF NOT EXISTS product_table(
marketplace varchar(50),
customer_id varchar(50),
product_id varchar(50),
seller_id varchar(50),
sell_date varchar(50),
quantity integer
)
"""

try:
    # Execute the create table command
    cursor.execute(create_table_command)
    conn.commit()
    print('product_table table created successfully or already exists.')
except psycopg2.Error as e:
    print('Error creating table:', e)

# Close the cursor and connection
cursor.close()
conn.close()

product_table table created successfully or already exists.


In [21]:
# Describe the security group
security_group_id = 'sg-03103c2cd4713cca3'  # Replace with your actual security group ID

try:
    response = ec2_client.describe_security_groups(GroupIds=[security_group_id])
    security_group = response['SecurityGroups'][0]
    print('Security Group Inbound Rules:')
    for rule in security_group['IpPermissions']:
        print(rule)
except ClientError as e:
    print('Error describing security group:', e)

Security Group Inbound Rules:
{'FromPort': 5439, 'IpProtocol': 'tcp', 'IpRanges': [{'CidrIp': '0.0.0.0/0'}], 'Ipv6Ranges': [], 'PrefixListIds': [], 'ToPort': 5439, 'UserIdGroupPairs': []}


In [24]:
#Copy the data from s3 to redshift

conn = psycopg2.connect(host=redshift_endpoint,
                        port=redshift_port,
                        database=redshift_database,
                        user=redshift_user,
                        password=redshift_password)
cursor = conn.cursor()



copy_command = f"""
COPY public.product_table
FROM 's3://div-redshift-test/product_data.csv'
CREDENTIALS 'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
DELIMITER ',' IGNOREHEADER 1;
"""


try:
    # Execute the COPY command
    cursor.execute(copy_command)
    conn.commit()
    print('COPY command executed successfully.')
except psycopg2.Error as e:
    print('Error executing COPY command:', e)

# Close the cursor and connection
cursor.close()
conn.close()

COPY command executed successfully.


In [25]:
conn = psycopg2.connect(host=redshift_endpoint,
                      port=redshift_port,
                      database=redshift_database,
                      user=redshift_user,
                      password=redshift_password)

# Create a cursor
cur = conn.cursor()

# Execute the SELECT query
cur.execute("SELECT * FROM product_table")

# Fetch all the rows returned by the query
rows = cur.fetchall()

# Process the retrieved rows
for row in rows:
    print(row)

# Close the cursor and connection
cur.close()
conn.close()

('US', '49033728', 'A6302503213', '1111', '31-08-2021', 10)
('US', '17857748', 'B000059PET1', '2222', '20-09-2021', 20)
('US', '25551507', 'S7888128071', '3333', '31-08-2021', 10)
('US', '21025041', 'W630250993', '4444', '20-09-2021', 20)
('US', '40943563', 'B00JENS2BI', '5555', '31-08-2021', 10)
('US', '17013969', 'J6305761302', '6666', '05-09-2021', 30)
('US', '47611685', 'K6300157555', '7777', '06-09-2021', 30)
('US', '35680737', 'H6300189570', 'xxxx', '07-09-2021', 40)
('US', '10747909', 'B000SXQ5US', 'yyyy', '08-09-2021', 20)


In [26]:
# Connect to the Redshift cluster
conn = psycopg2.connect(
    host=redshift_endpoint,
    port=redshift_port,
    database=redshift_database,
    user=redshift_user,
    password=redshift_password
)

# Create a cursor
cur = conn.cursor()

# List of tables to drop
tables_to_drop = ['product_table']

# Drop the tables
for table_name in tables_to_drop:
    cur.execute(f"DROP TABLE IF EXISTS {table_name}")

print('Drop table successfully.')    
# Commit the changes to the database
conn.commit()

# Close the cursor and connection
cur.close()
conn.close()

Drop table successfully.


In [27]:
cluster_identifier = 'my-redshift-cluster'

redshift_client.delete_cluster(ClusterIdentifier=cluster_identifier,
                               SkipFinalClusterSnapshot=True)

redshift_client.get_waiter('cluster_deleted').wait(ClusterIdentifier=cluster_identifier)

print("delete cluster successfully")

delete cluster successfully


In [28]:
# Delete the security group
ec2_client.delete_security_group(GroupId=security_group_id)

{'ResponseMetadata': {'RequestId': '2caf94b6-17a8-4b62-93c0-e585f4e6dfed',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '2caf94b6-17a8-4b62-93c0-e585f4e6dfed',
   'cache-control': 'no-cache, no-store',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'content-type': 'text/xml;charset=UTF-8',
   'content-length': '283',
   'date': 'Fri, 07 Jun 2024 21:59:11 GMT',
   'server': 'AmazonEC2'},
  'RetryAttempts': 0}}