In [1]:
import sqlite3
import pandas as pd


### Step-1 : Load the Database

In [2]:
conn = sqlite3.connect('musicbrainz-cmudb2020.db')
cursor = conn.cursor()

In [3]:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = cursor.fetchall()
print(tables)
print(len(tables))

[('area',), ('artist',), ('artist_alias',), ('artist_credit_name',), ('artist_credit',), ('artist_type',), ('gender',), ('language',), ('medium',), ('medium_format',), ('release',), ('release_info',), ('release_status',), ('work',), ('work_type',)]
15


Observe that we have 15 tables in this database.

## Q1 (q1_sample)

The purpose of this query is to make sure that the formatting of your output matches exactly the formatting of our auto-grading script.

<b>Details:</b> List all types of work ordered by type ascendingly.

In [4]:
# Lets have a look at all columns in work_type table

cursor.execute("PRAGMA table_info('work_type')")
cols = cursor.fetchall()
for col in cols:
    print(col[1])

id
name
description


### Step - 2: Check the schema


In [5]:
schema=pd.read_sql_query("""pragma table_info('artist')
""",conn)
schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,begin_date_year,INTEGER,0,,0
3,3,begin_date_month,INTEGER,0,,0
4,4,begin_date_day,INTEGER,0,,0
5,5,end_date_year,INTEGER,0,,0
6,6,end_date_month,TEXT,0,,0
7,7,end_date_day,TEXT,0,,0
8,8,type,INTEGER,0,,0
9,9,area,INTEGER,0,,0


In [6]:
area_schema=pd.read_sql_query("""pragma table_info('area')
""",conn)
area_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,comment,TEXT,0,,0


In [7]:
artist_schema=pd.read_sql_query("""pragma table_info('artist')
""",conn)
artist_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,begin_date_year,INTEGER,0,,0
3,3,begin_date_month,INTEGER,0,,0
4,4,begin_date_day,INTEGER,0,,0
5,5,end_date_year,INTEGER,0,,0
6,6,end_date_month,TEXT,0,,0
7,7,end_date_day,TEXT,0,,0
8,8,type,INTEGER,0,,0
9,9,area,INTEGER,0,,0


In [8]:
schema=pd.read_sql_query("""pragma table_info('artist_alias')
""",conn)
schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,artist,INTEGER,0,,0
2,2,name,TEXT,0,,0


In [9]:
artist_credits_schema=pd.read_sql_query("""pragma table_info('artist_credit')
""",conn)
artist_credits_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,artist_count,INTEGER,0,,0


In [10]:
artist_credits_name_schema=pd.read_sql_query("""pragma table_info('artist_credit_name')
""",conn)
artist_credits_name_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,artist_credit,INTEGER,0,,0
1,1,position,INTEGER,0,,0
2,2,artist,INTEGER,0,,0
3,3,name,TEXT,0,,0


In [11]:
artist_type_schema=pd.read_sql_query("""pragma table_info('artist_type')
""",conn)
artist_type_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0


In [12]:
gender_schema=pd.read_sql_query("""pragma table_info('gender')
""",conn)
gender_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,description,TEXT,0,,0


In [13]:
language_schema=pd.read_sql_query("""pragma table_info('language')
""",conn)
language_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0


In [14]:
medium_schema=pd.read_sql_query("""pragma table_info('medium')
""",conn)
medium_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,release,INTEGER,0,,0
2,2,position,INTEGER,0,,0
3,3,format,INTEGER,0,,0
4,4,name,TEXT,0,,0


In [15]:
medium_format_schema=pd.read_sql_query("""pragma table_info('medium_format')
""",conn)
medium_format_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,description,TEXT,0,,0


In [16]:
release_schema=pd.read_sql_query("""pragma table_info('release')
""",conn)
release_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,artist_credit,INTEGER,0,,0
3,3,status,INTEGER,0,,0
4,4,language,INTEGER,0,,0
5,5,comment,TEXT,0,,0


In [17]:
release_info_schema=pd.read_sql_query("""pragma table_info('release_info')
""",conn)
release_info_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,release,INTEGER,0,,0
1,1,area,INTEGER,0,,0
2,2,date_year,INTEGER,0,,0
3,3,date_month,INTEGER,0,,0
4,4,date_day,INTEGER,0,,0


In [18]:
release_status_schema=pd.read_sql_query("""pragma table_info('release_status')
""",conn)
release_status_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,description,TEXT,0,,0


In [19]:
work_schema=pd.read_sql_query("""pragma table_info('work')
""",conn)
work_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,type,INTEGER,0,,0
3,3,comment,TEXT,0,,0


In [20]:
work_type_schema=pd.read_sql_query("""pragma table_info('work_type')
""",conn)
work_type_schema

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,name,TEXT,0,,0
2,2,description,TEXT,0,,0


### Q1 (q1_sample):
The purpose of this query is to make sure that the formatting of your output matches exactly the formatting of our auto-grading script.<br>
* Details: List all types of work ordered by type ascendingly.


In [22]:
name = pd.read_sql_query("""
select name from work_type
order by name ASC
""",conn)
name

Unnamed: 0,name
0,Aria
1,Audio drama
2,Ballet
3,Beijing opera
4,Cantata
5,Concerto
6,Incidental music
7,Madrigal
8,Mass
9,Motet


### Q2 (q2_long_name):
List works with the longest name of each type.<br>
* Details: For each work type, find works that have the longest names. There might be cases where there is a tie for the longest names - in that case, return all of them. Display work names and corresponding type names, and order it according to work type (ascending) and use work name (ascending) as tie-breaker.


In [23]:
join = pd.read_sql_query(""" Select work.id,work.name, work.type, work_type.name, work_type.description 
from work
inner join work_type on work.id=work_type.id""",conn)
join

Unnamed: 0,id,name,type,name.1,description
0,17,Lately,17,Song,A song is in its origin (and still in most cas...
1,19,Safe From Harm,17,Zarzuela,A zarzuela is a Spanish lyric-dramatic work th...
2,16,Hymn of the Big Wheel,17,Symphony,"A symphony is an extended composition, almost ..."
3,15,Blue Lines,17,Song-cycle,A song cycle is a group of songs designed to b...
4,10,Daydreaming,17,Opera,An opera is a dramatised work (text + musical ...
5,11,Five Man Army,17,Oratorio,An oratorio is a large (usually sacred) musica...
6,18,One Love,17,Symphonic poem,A symphonic poem is a piece of programmatic or...


In [24]:
leagues = pd.read_sql("""SELECT *
                        FROM work
                        JOIN work_type ON work_type.id = work.id;""", conn)
leagues

Unnamed: 0,id,name,type,comment,id.1,name.1,description
0,17,Lately,17,,17,Song,A song is in its origin (and still in most cas...
1,19,Safe From Harm,17,,19,Zarzuela,A zarzuela is a Spanish lyric-dramatic work th...
2,16,Hymn of the Big Wheel,17,,16,Symphony,"A symphony is an extended composition, almost ..."
3,15,Blue Lines,17,,15,Song-cycle,A song cycle is a group of songs designed to b...
4,10,Daydreaming,17,,10,Opera,An opera is a dramatised work (text + musical ...
5,11,Five Man Army,17,,11,Oratorio,An oratorio is a large (usually sacred) musica...
6,18,One Love,17,,18,Symphonic poem,A symphonic poem is a piece of programmatic or...
