### Redshift Data Warehouse Project
This notebook provides all necessary code to connect to the db in the Redshift cluster and run some sample queries.

In [None]:
import pandas as pd
import boto3
import json

In [None]:
# load parameter from dwh.cfg file
import configparser
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

# save values in constant variable names
KEY                    = config.get('AWS','KEY')
SECRET                 = config.get('AWS','SECRET')

DWH_CLUSTER_IDENTIFIER = config.get("DWH","DWH_CLUSTER_IDENTIFIER")
DWH_DB                 = config.get("CLUSTER","DB_NAME")
DWH_DB_USER            = config.get("CLUSTER","DB_USER")
DWH_DB_PASSWORD        = config.get("CLUSTER","DB_PASSWORD")
DWH_PORT               = config.get("CLUSTER","DB_PORT")

DWH_ENDPOINT           = config.get("DWH","DWH_ENDPOINT")

In [None]:
# create redshift client
import boto3
redshift = boto3.client('redshift', region_name='us-east-1', aws_access_key_id=KEY, aws_secret_access_key=SECRET)

---
### Check cluster status before accessing the db

In [None]:
# method to check cluster status
def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', -1)
    keysToShow = ["ClusterIdentifier", "NodeType", "ClusterStatus", "MasterUsername", "DBName", "Endpoint", "NumberOfNodes", 'VpcId']
    x = [(k, v) for k,v in props.items() if k in keysToShow]
    return pd.DataFrame(data=x, columns=["Key", "Value"])

In [None]:
# check the cluster status
myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

---

### Connect to the database

In [None]:
%load_ext sql

In [None]:
conn_string="postgresql://{}:{}@{}:{}/{}".format(DWH_DB_USER, DWH_DB_PASSWORD, DWH_ENDPOINT, DWH_PORT, DWH_DB)
print(conn_string)
%sql $conn_string

---

### Run sql queries

In [None]:
%sql select count(*) from staging_songs

In [None]:
%sql select count(*) from staging_events

In [None]:
%sql select count(*) from songplays

In [None]:
%sql select count(*) from users

In [None]:
%sql select title from staging_songs where artist_id = 'AR73AIO1187B9AD57B'

In [None]:
%sql select artist_id from staging_songs where num_songs = '1'

In [None]:
%sql select artist from staging_events where firstName like 'Sylvie'