# Denomarlization

## With Denormalization, we want to think about the queries we are running and how we can reduce our numbber of JOINS even if that means duplicating data

### Though JOINS are flexible, they are slow and if we have a read heavy workload that requires low potency queries 

In [1]:
import psycopg2

In [2]:
conn = psycopg2.connect("dbname=udacity user=postgres password=pwrd")

cur = conn.cursor()

conn.set_session(autocommit=True)

In [6]:
#cur.execute("drop table album_library")
#cur.execute("drop table song_library")
#cur.execute("drop table artist_library")
#cur.execute("drop table song_length")

### create tables

![alt text](pic14.png "Image 1 Display")


In [7]:
# Table: album_library
cur.execute("create table if not exists album_library(album_id int, album_name varchar, artist_id int, year int)")


cur.execute("insert into album_library(album_id, album_name, artist_id, year) values (%s, %s, %s, %s)",
            (1, "Rubber Soul", 1, 1965))
            
cur.execute("insert into album_library(album_id, album_name, artist_id, year) values (%s, %s, %s, %s)",
            (2, "Let It Be", 1, 1970))
 

            

            
# Table: song_library
cur.execute("create table if not exists song_library(song_id int, album_id int, song_name varchar)")

cur.execute("insert into song_library(song_id, album_id, song_name) values (%s, %s, %s)",
            (1, 1, "Michelle"))
 
cur.execute("insert into song_library(song_id, album_id, song_name) values (%s, %s, %s)",
            (2, 1, "Think For Yourself"))
 
cur.execute("insert into song_library(song_id, album_id, song_name) values (%s, %s, %s)",
            (3, 1, "In My Life"))
            
cur.execute("insert into song_library(song_id, album_id, song_name) values (%s, %s, %s)",
            (4, 2, "Let It Be"))
 
cur.execute("insert into song_library(song_id, album_id, song_name) values (%s, %s, %s)",
            (5, 2, "Across The Universe"))
 
            
      
            
            
            
# Table: artist_library
cur.execute("create table if not exists artist_library(artist_id int, artist_name varchar)")


cur.execute("insert into artist_library(artist_id, artist_name) values (%s, %s)",
            (1, "The Beatles"))
 





# Table: song_length
cur.execute("create table if not exists song_length (song_id int, song_length int)")


cur.execute("insert into song_length (song_id, song_length) values (%s, %s)",
            (1, 163))
            
            
cur.execute("insert into song_length (song_id, song_length) values (%s, %s)",
            (2, 137))
            
    
cur.execute("insert into song_length (song_id, song_length) values (%s, %s)",
            (3, 145))
    

cur.execute("insert into song_length (song_id, song_length) values (%s, %s)",
            (4, 240))
        

cur.execute("insert into song_length (song_id, song_length) values (%s, %s)",
            (5, 227))
            


In [8]:
print("Table: album_library \n")

cur.execute("select * from album_library")

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()





    

print("\n \n Table: song_library \n")

cur.execute("select * from song_library")

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()







print("\n \n Table: artist_library \n")

cur.execute("select * from artist_library")

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()







print("\n \n Table: song_length \n")

cur.execute("select * from song_length")

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()


Table: album_library 

(1, 'Rubber Soul', 1, 1965)
(2, 'Let It Be', 1, 1970)

 
 Table: song_library 

(1, 1, 'Michelle')
(2, 1, 'Think For Yourself')
(3, 1, 'In My Life')
(4, 2, 'Let It Be')
(5, 2, 'Across The Universe')

 
 Table: artist_library 

(1, 'The Beatles')

 
 Table: song_length 

(1, 163)
(2, 137)
(3, 145)
(4, 240)
(5, 227)


### Let imagine to have Query with this columns

![alt text](pic15.png "Image 1 Display")


In [9]:
cur.execute("select artist_library.artist_id, artist_name, album_library.album_id, \
            album_name, year, song_library.song_id, song_name, song_length.song_length \
            from ((artist_library join album_library on artist_library.artist_id = album_library.artist_id) \
                  join song_library on album_library.album_id = song_library.album_id) \
            join song_length on song_library.song_id = song_length.song_id")


row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()


(1, 'The Beatles', 1, 'Rubber Soul', 1965, 1, 'Michelle', 163)
(1, 'The Beatles', 1, 'Rubber Soul', 1965, 2, 'Think For Yourself', 137)
(1, 'The Beatles', 1, 'Rubber Soul', 1965, 3, 'In My Life', 145)
(1, 'The Beatles', 2, 'Let It Be', 1970, 4, 'Let It Be', 240)
(1, 'The Beatles', 2, 'Let It Be', 1970, 5, 'Across The Universe', 227)


### Denormalization

![alt text](re1.png "Image 1 Display")


### Query 1

![alt text](re2.png "Image 1 Display")


## create the new two tables by joining the needed data

In [17]:
cur.execute("create table if not exists album_library1 (album_id int, album_name varchar, artist_name varchar, year int)")



cur.execute("insert into album_library1 (album_id, album_name, artist_name, year) values (%s, %s, %s, %s)",
            (1, "Rubber Soul", "The Beatles", 1965))

cur.execute("insert into album_library1 (album_id, album_name, artist_name, year) values (%s, %s, %s, %s)",
            (2, "Let It Be", "The Beatles", 1970))








cur.execute("create table if not exists song_library1 (song_id int, album_id int, song_name varchar, song_length int)")            



cur.execute("insert into song_library1 (song_id, album_id, song_name, song_length) values (%s, %s, %s, %s)",
            (1, 1, "Michelle", 163))

cur.execute("insert into song_library1 (song_id, album_id, song_name, song_length) values (%s, %s, %s, %s)",
            (2, 1, "Think For Yourself", 137))

cur.execute("insert into song_library1 (song_id, album_id, song_name, song_length) values (%s, %s, %s, %s)",
            (3, 1, "In My Life", 145))

cur.execute("insert into song_library1 (song_id, album_id, song_name, song_length) values (%s, %s, %s, %s)",
            (4, 2, "Let It Be", 240))

cur.execute("insert into song_library1 (song_id, album_id, song_name, song_length) values (%s, %s, %s, %s)",
            (5, 2, "Across The Universe", 227))


### Now we can do a simplified query to get the information we need. Only one Join is needed

In [18]:
cur.execute("select artist_name, album_name, year, song_name, song_length from song_library1 join album_library1 on song_library1.album_id = album_library1.album_id")



row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()



('The Beatles', 'Rubber Soul', 1965, 'Michelle', 163)
('The Beatles', 'Rubber Soul', 1965, 'Think For Yourself', 137)
('The Beatles', 'Rubber Soul', 1965, 'In My Life', 145)
('The Beatles', 'Let It Be', 1970, 'Let It Be', 240)
('The Beatles', 'Let It Be', 1970, 'Across The Universe', 227)


### Querry 2

![alt text](re4.png "Image 1 Display")


## Create the new table

In [19]:


cur.execute("create table if not exists album_length1 (song_id int, album_name varchar, song_length int)")            



cur.execute("insert into album_length1 (song_id, album_name, song_length) values (%s, %s, %s)",
            (1, "Rubber Soul", 163))

cur.execute("insert into album_length1 (song_id, album_name, song_length) values (%s, %s, %s)",
            (2, "Rubber Soul", 137))

cur.execute("insert into album_length1 (song_id, album_name, song_length) values (%s, %s, %s)",
            (3, "Rubber Soul", 145))

cur.execute("insert into album_length1 (song_id, album_name, song_length) values (%s, %s, %s)",
            (4, "Let It Be", 240))

cur.execute("insert into album_length1 (song_id, album_name, song_length) values (%s, %s, %s)",
            (5, "Let It Be", 227))



In [20]:
cur.execute("select album_name, SUM(song_length) from album_length1 GROUP BY album_name")



row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()



('Rubber Soul', 445)
('Let It Be', 467)


### We have successufully taken normalized table and denormalized them inorder to speed up our performance and allow for simplier queries to be executed

In [21]:
#cur.execute("drop table album_library1")
#cur.execute("drop table song_library1")
#cur.execute("drop table album_length1")