# Multi-Table Database - Tracks

**Musical Track Database**

This application will read an iTunes export file in XML and produce a properly normalized database with this structure:

```sql
CREATE TABLE Artist (
    id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    name    TEXT UNIQUE
);

CREATE TABLE Genre (
    id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    name    TEXT UNIQUE
);

CREATE TABLE Album (
    id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    artist_id  INTEGER,
    title   TEXT UNIQUE
);

CREATE TABLE Track (
    id  INTEGER NOT NULL PRIMARY KEY 
        AUTOINCREMENT UNIQUE,
    title TEXT  UNIQUE,
    album_id  INTEGER,
    genre_id  INTEGER,
    len INTEGER, rating INTEGER, count INTEGER
);
```

If you run the program multiple times in testing or with different files, make sure to empty out the data before each run.

You can use this code as a starting point for your application: http://www.py4e.com/code3/tracks.zip. The ZIP file contains the Library.xml file to be used for this assignment. You can export your own tracks from iTunes and create a database, but for the database that you turn in for this assignment, only use the Library.xml data that is provided.

To grade this assignment, the program will run a query like this on your uploaded database and look for the data it expects to see:

```sql
SELECT Track.title, Artist.name, Album.title, Genre.name 
    FROM Track JOIN Genre JOIN Album JOIN Artist 
    ON Track.genre_id = Genre.ID and Track.album_id = Album.id 
        AND Album.artist_id = Artist.id
    ORDER BY Artist.name LIMIT 3
```

The expected result of the modified query on your database is: (shown here as a simple HTML table with titles)

|Track	                                |Artist |Album	        |Genre  |
|---------------------------------------|-------|---------------|-------|
|Chase the Ace	                        |AC/DC	|Who Made Who	|Rock   |
|D.T.	                                |AC/DC	|Who Made Who	|Rock   |
|For Those About To Rock (We Salute You)|AC/DC	|Who Made Who	|Rock   |

In [1]:
import xml.etree.ElementTree as ET
import sqlite3
import pandas as pd

conn = sqlite3.connect('./data/trackdb.sqlite')
cur = conn.cursor()

# Make some fresh tables using executescript()
cur.executescript(  
                    '''
                    DROP TABLE IF EXISTS Artist;
                    DROP TABLE IF EXISTS Genre;
                    DROP TABLE IF EXISTS Album;
                    DROP TABLE IF EXISTS Track;

                    CREATE TABLE Artist (   id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
                                            name TEXT UNIQUE);

                    CREATE TABLE Genre (id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
                                        name TEXT UNIQUE);

                    CREATE TABLE Album (id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
                                        artist_id INTEGER,
                                        title TEXT UNIQUE);

                    CREATE TABLE Track (id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
                                        title TEXT  UNIQUE,
                                        album_id  INTEGER,
                                        genre_id  INTEGER,
                                        len INTEGER, rating INTEGER, count INTEGER);
                    '''
                )

fname = 'Library.xml'

def lookup(d, key):
    found = False
    for child in d:
        if found: 
            return child.text
        if child.tag == 'key' and child.text == key :
            found = True
    return None

stuff = ET.parse("./data/" + fname)
all = stuff.findall('dict/dict/dict')
for entry in all:
    if (lookup(entry, 'Track ID') is None): 
        continue

    artist = lookup(entry, 'Artist')
    genre = lookup(entry, 'Genre')
    album = lookup(entry, 'Album')
    name = lookup(entry, 'Name')
    count = lookup(entry, 'Play Count')
    rating = lookup(entry, 'Rating')
    length = lookup(entry, 'Total Time')

    if artist is None or genre is None or album is None or name is None: 
        continue
    
    cur.execute(
                '''
                INSERT INTO Artist (name) 
                SELECT ? 
                WHERE NOT EXISTS (  SELECT * 
                                    FROM Artist 
                                    WHERE name = ?)
                ''', (artist, artist, )
                )

    cur.execute(
                '''
                INSERT INTO Genre (name) 
                SELECT ? 
                WHERE NOT EXISTS (  SELECT * 
                                    FROM Genre 
                                    WHERE name = ?)
                ''', (genre, genre, )
                )
    
    cur.execute(
                '''
                SELECT id
                FROM Artist
                WHERE name = ?
                ''', (artist, )
                )
    artist_id = cur.fetchone()[0]

    cur.execute(
                '''
                INSERT INTO Album (artist_id, title) 
                SELECT ?, ?
                WHERE NOT EXISTS (  SELECT * 
                                    FROM Album
                                    WHERE title = ?)
                ''', (artist_id, album, album, )
                )
    
    cur.execute(
                '''
                SELECT id
                FROM Album
                WHERE title = ?
                ''', (album, )
                )
    album_id = cur.fetchone()[0]

    cur.execute(
                '''
                SELECT id
                FROM Genre
                WHERE name = ?
                ''', (genre, )
                )
    genre_id = cur.fetchone()[0]

    cur.execute(
                '''
                INSERT INTO Track (title, album_id, genre_id, len, rating, count) 
                SELECT ?, ?, ?, ?, ?, ?
                WHERE NOT EXISTS (  SELECT * 
                                    FROM Track 
                                    WHERE title = ?)
                ''', (name, album_id, genre_id, length, rating, count, name, ) 
                )

query = """
        SELECT Track.title, Artist.name, Album.title, Genre.name 
        FROM Track JOIN Genre JOIN Album JOIN Artist 
        ON Track.genre_id = Genre.ID AND Track.album_id = Album.id AND Album.artist_id = Artist.id
        ORDER BY Artist.name LIMIT 3
        """

df = pd.read_sql_query(query, conn)

conn.commit()
conn.close()

df

Unnamed: 0,title,name,title.1,name.1
0,For Those About To Rock (We Salute You),AC/DC,Who Made Who,Rock
1,Hells Bells,AC/DC,Who Made Who,Rock
2,Shake Your Foundations,AC/DC,Who Made Who,Rock
