# Sample Analysis for new DB

In [3]:
import configparser
import psycopg2
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [4]:
config = configparser.ConfigParser()
config.read('dwh.cfg')

HOST=config.get("DWH","DWH_HOST")
DB_NAME=config.get("DWH","DWH_DB")
DB_USER=config.get("DWH","DWH_DB_USER")
DB_PASSWORD=config.get("DWH","DWH_DB_PASSWORD")
DB_PORT=config.get("DWH","DWH_PORT")

conn = psycopg2.connect(dbname=DB_NAME, user=DB_USER, password=DB_PASSWORD, host=HOST, port =DB_PORT)
cur = conn.cursor()

conn_string="postgresql://{}:{}@{}:{}/{}".format(DB_USER, DB_PASSWORD, HOST, DB_PORT, DB_NAME)
print(conn_string)
%sql $conn_string

postgresql://dwhuser:Passw0rd@dwhcluster.cwsikrn4kajq.us-west-2.redshift.amazonaws.com:5439/dwh


'Connected: dwhuser@dwh'

![](SparkifyERD.png)

### Find the most active user

In [5]:
query1 = '''
SELECT 
    T1.user_id, 
    U.first_name, 
    U.last_name, 
    SongsPlayed
FROM (
    SELECT TOP 5
        S.user_id,
        COUNT(*) AS SongsPlayed --DISTINCT song_id
    FROM 
        songplayfact S
    GROUP BY
        S.user_id
    ) T1
    LEFT JOIN usersdim U ON T1.user_id = U.user_id
ORDER BY
    SongsPlayed DESC
'''

%sql $query1

 * postgresql://dwhuser:***@dwhcluster.cwsikrn4kajq.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


user_id,first_name,last_name,songsplayed
8,Kaylee,Summers,3
12,Austin,Rosales,2
90,Andrea,Butler,1
26,Ryan,Smith,1
98,Jordyn,Powell,1


### Most listened to songs and their artists per unique user. 

In [6]:
query2 = '''

SELECT TOP 5
    S.title AS SongName, 
    A.name AS Artist, 
    UniqueListeners
FROM (
    SELECT
        SP.song_id, 
        COUNT(DISTINCT SP.user_id) AS UniqueListeners
    FROM
        songplayfact SP
    GROUP BY 
        SP.song_id
    ) T1
    LEFT JOIN songdim S ON T1.song_id = S.song_id
    LEFT JOIN artistdim A ON S.artist_id = A.artist_id
ORDER BY
    UniqueListeners DESC
'''

%sql $query2

 * postgresql://dwhuser:***@dwhcluster.cwsikrn4kajq.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


songname,artist,uniquelisteners
You're The One,Dwight Yoakam,22
Catch You Baby (Steve Pitron & Max Sanna Radio Edit),Lonnie Gordon,9
Nothin' On You [feat. Bruno Mars] (Album Version),B.o.B,7
I CAN'T GET STARTED,Ron Carter,6
Make Her Say,Kid Cudi / Kanye West / Common,5


In [7]:
### When do all users listen to music? 

In [8]:
query3 = '''
SELECT
    T.hour
    , COUNT(*)
FROM
    songplayfact SP
    LEFT JOIN timedim T on SP.start_time = T.start_time
GROUP BY 
    T.hour
ORDER BY 
    T.hour
'''

%sql $query3

 * postgresql://dwhuser:***@dwhcluster.cwsikrn4kajq.us-west-2.redshift.amazonaws.com:5439/dwh
24 rows affected.


hour,count
0,6
1,11
2,3
3,2
4,7
5,7
6,9
7,13
8,18
9,9


In [22]:
query4 = '''
SELECT
    COUNT(*)
FROM
    timedim
'''

%sql $query4

 * postgresql://dwhuser:***@dwhcluster.cwsikrn4kajq.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


count
333
