In this notebook I will walk through the basics of modeling data in normalized form. I will create tables, insert rows of data, and do a simple JOIN SQL query to show how these tables can work together.

In [2]:
import psycopg2

In [3]:
try:
    conn = psycopg2.connect("dbname=udacity")
except psycopg2.Error as e:
    print("Error: Could not connect to database")
    print(e)
    

In [4]:
try:
    cur = conn.cursor()
except psycopg2.Error as e:
    print("Error: Could not get cursor to the database")
    print(e)
conn.set_session(autocommit=True)

Let's imagine we have a table called Music Library.<br>
<br>
Table Name: music_library<br>
column 0: Album Id<br>
column 1: Album Name<br>
column 2: Artist Name<br>
column 3: Year<br>
column 4: List of songs

In [4]:
try:
    cur.execute("CREATE TABLE IF NOT EXISTS music_library (album_id int, album_name varchar, artist_name varchar, year int, songs text[]);")
except psycopg2.Error as e:
    print("Error: Issue creating table")
    print(e)

In [7]:
try:
    cur.execute("INSERT INTO music_library (album_id, album_name, artist_name, year, songs) VALUES (%s, %s, %s, %s, %s)", \
                (1, "Rubber Soul", "The Beatles", 1965, ["Michelle", "Think For Yourself", "In My Life"]))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)

In [8]:
try:
    cur.execute("INSERT INTO music_library (album_id, album_name, artist_name, year, songs) VALUES (%s, %s, %s, %s, %s)", \
                (2, "Let It Be", "The Beatles", 1970, ["Let It Be", "Across The Universe"]))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)

In [9]:
# Test to see if we can fetch newly inserted rows:
try:
    cur.execute("SELECT * FROM music_library;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'Rubber Soul', 'The Beatles', 1965, ['Michelle', 'Think For Yourself', 'In My Life'])
(2, 'Let It Be', 'The Beatles', 1970, ['Let It Be', 'Across The Universe'])


### Moving to 1st Normal Form (1NF)

This data has not been normalized. To get this data into 1NF, we need to remove any collections or list of data. We need to break up the list of songs into individual rows:

Table Name: music_library_2<br>
column 0: Album Id<br>
column 1: Album Name<br>
column 2: Artist Name<br>
column 3: Year<br>
column 4: Song Name

In [10]:
try:
    cur.execute("CREATE TABLE IF NOT EXISTS music_library_2 (album_id int, album_name varchar, artist_name varchar, year int, song_name varchar);")
except psycopg2.Error as e:
    print("Error: Issue creating table")
    print(e)

In [11]:
try:
    cur.execute("INSERT INTO music_library_2 (album_id, album_name, artist_name, year, song_name) VALUES (%s, %s, %s, %s, %s)", \
                (1, "Rubber Soul", "The Beatles", 1965, "Michelle"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO music_library_2 (album_id, album_name, artist_name, year, song_name) VALUES (%s, %s, %s, %s, %s)", \
                (1, "Rubber Soul", "The Beatles", 1965, "Think For Yourself"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO music_library_2 (album_id, album_name, artist_name, year, song_name) VALUES (%s, %s, %s, %s, %s)", \
                (1, "Rubber Soul", "The Beatles", 1965, "In My Life"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO music_library_2 (album_id, album_name, artist_name, year, song_name) VALUES (%s, %s, %s, %s, %s)", \
                (2, "Let It Be", "The Beatles", 1970, "Let It Be"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO music_library_2 (album_id, album_name, artist_name, year, song_name) VALUES (%s, %s, %s, %s, %s)", \
                (2, "Let It Be", "The Beatles", 1970, "Across The Universe"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)

In [12]:
# Test to see if we can fetch newly inserted rows:
try:
    cur.execute("SELECT * FROM music_library_2;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'Rubber Soul', 'The Beatles', 1965, 'Michelle')
(1, 'Rubber Soul', 'The Beatles', 1965, 'Think For Yourself')
(1, 'Rubber Soul', 'The Beatles', 1965, 'In My Life')
(2, 'Let It Be', 'The Beatles', 1970, 'Let It Be')
(2, 'Let It Be', 'The Beatles', 1970, 'Across The Universe')


### Moving to 2nd Normal Form (2NF)

I have just moved the data to be in 1NF, which is the first step in moving to 2nd Normal Form. But we're not quite there yet. While each of our records in our table is unique, our Primary key (album id) is not unique. We need to break this up into two tables, album library and song library.

Table Name: album_library<br>
column 0: Album Id<br>
column 1: Album Name<br>
column 2: Artist Name<br>
column 3: Year<br>
<br>
Table Name: song_library<br>
column 0: Song Id<br>
column 1: Song Name<br>
column 2: Album Id<br>
<br>

In [5]:
try:
    cur.execute("CREATE TABLE IF NOT EXISTS album_library (album_id int, album_name varchar, artist_name varchar, year int);")
except psycopg2.Error as e:
    print("Error: Issue creating table")
    print(e)
    
try:
    cur.execute("CREATE TABLE IF NOT EXISTS song_library (song_id int, album_id int, song_name varchar);")
except psycopg2.Error as e:
    print("Error: Issue creating table")
    print(e)

In [7]:
try:
    cur.execute("INSERT INTO album_library (album_id, album_name, artist_name, year) VALUES (%s, %s, %s, %s)", \
                (1, "Rubber Soul", "The Beatles", 1965))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO album_library (album_id, album_name, artist_name, year) VALUES (%s, %s, %s, %s)", \
                (2, "Let It Be", "The Beatles", 1970))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)

In [8]:
try:
    cur.execute("INSERT INTO song_library (song_id, album_id, song_name) VALUES (%s, %s, %s)", \
                (1, 1, "Michelle"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO song_library (song_id, album_id, song_name) VALUES (%s, %s, %s)", \
                (2, 1, "Think For Yourself"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO song_library (song_id, album_id, song_name) VALUES (%s, %s, %s)", \
                (3, 1, "In My Life"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO song_library (song_id, album_id, song_name) VALUES (%s, %s, %s)", \
                (4, 2, "Let It Be"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO song_library (song_id, album_id, song_name) VALUES (%s, %s, %s)", \
                (5, 2, "Across the Universe"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)

In [12]:
print("Table: album_library\n")
try:
    cur.execute("SELECT * FROM album_library;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()
    
print("\nTable: song_library\n")
try:
    cur.execute("SELECT * FROM song_library;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

Table: album_library

(1, 'Rubber Soul', 'The Beatles', 1965)
(2, 'Let It Be', 'The Beatles', 1970)

Table: song_library

(1, 1, 'Michelle')
(2, 1, 'Think For Yourself')
(3, 1, 'In My Life')
(4, 2, 'Let It Be')
(5, 2, 'Across the Universe')


Let's do a join on this table so we can get all the information we had in our first table:

In [11]:
try:
    cur.execute("SELECT * FROM album_library JOIN song_library ON album_library.album_id = song_library.album_id ;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'Rubber Soul', 'The Beatles', 1965, 1, 1, 'Michelle')
(1, 'Rubber Soul', 'The Beatles', 1965, 2, 1, 'Think For Yourself')
(1, 'Rubber Soul', 'The Beatles', 1965, 3, 1, 'In My Life')
(2, 'Let It Be', 'The Beatles', 1970, 4, 2, 'Let It Be')
(2, 'Let It Be', 'The Beatles', 1970, 5, 2, 'Across the Universe')


### Moving to 3rd Normal Form (3NF)

For 3NF, we cannot have any transitive dependencies. album_library can move artist_name to its own table, called Artists, which will leave us with 3 tables:

Table Name: album_library_2<br>
column 0: Album Id<br>
column 1: Album Name<br>
column 2: Artist Id<br>
column 3: Year<br>
<br>
Table Name: song_library<br>
column 0: Song Id<br>
column 1: Song Name<br>
column 2: Album Id<br>
<br>
Table Name: artist_library<br>
column 0: Artist Id<br>
column 1: Artist Name<br>
<br>

In [13]:
try:
    cur.execute("CREATE TABLE IF NOT EXISTS album_library_2 (album_id int, album_name varchar, artist_id int, year int);")
except psycopg2.Error as e:
    print("Error: Issue creating table")
    print(e)
    
try:
    cur.execute("CREATE TABLE IF NOT EXISTS artist_library (artist_id int, artist_name varchar);")
except psycopg2.Error as e:
    print("Error: Issue creating table")
    print(e)

In [14]:
try:
    cur.execute("INSERT INTO album_library_2 (album_id, album_name, artist_id, year) VALUES (%s, %s, %s, %s)", \
                (1, "Rubber Soul", 1, 1965))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    
try:
    cur.execute("INSERT INTO album_library_2 (album_id, album_name, artist_id, year) VALUES (%s, %s, %s, %s)", \
                (2, "Let It Be", 1, 1970))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)
    

In [15]:
try:
    cur.execute("INSERT INTO artist_library (artist_id, artist_name) VALUES (%s, %s)", \
                (1, "The Beatles"))
except psycopg2.Error as e:
    print("Error: Inserting rows")
    print(e)

In [16]:
print("Table: album_library_2\n")
try:
    cur.execute("SELECT * FROM album_library_2;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()
    
print("\nTable: song_library\n")
try:
    cur.execute("SELECT * FROM song_library;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()
    
print("\nTable: artist_library\n")
try:
    cur.execute("SELECT * FROM artist_library;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

Table: album_library_2

(1, 'Rubber Soul', 1, 1965)
(2, 'Let It Be', 1, 1970)

Table: song_library

(1, 1, 'Michelle')
(2, 1, 'Think For Yourself')
(3, 1, 'In My Life')
(4, 2, 'Let It Be')
(5, 2, 'Across the Universe')

Table: artist_library

(1, 'The Beatles')


Let's do a JOIN on these 3 tables so we can get all the information we had in our first table:

In [17]:
try:
    cur.execute("SELECT * FROM (artist_library JOIN album_library_2 ON artist_library.artist_id = album_library_2.artist_id) JOIN song_library ON album_library_2.album_id = song_library.album_id;")
except psycopg2.Error as e:
    print("Error: select *")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'The Beatles', 1, 'Rubber Soul', 1, 1965, 1, 1, 'Michelle')
(1, 'The Beatles', 1, 'Rubber Soul', 1, 1965, 2, 1, 'Think For Yourself')
(1, 'The Beatles', 1, 'Rubber Soul', 1, 1965, 3, 1, 'In My Life')
(1, 'The Beatles', 2, 'Let It Be', 1, 1970, 4, 2, 'Let It Be')
(1, 'The Beatles', 2, 'Let It Be', 1, 1970, 5, 2, 'Across the Universe')


### Done! We have now Normalizes our dataset!

close our cursor and connection:

In [18]:
cur.close()
conn.close()