In [1]:
import sqlite3
import pandas as pd

In [2]:
conn = sqlite3.connect("academy_awards.db")

In [3]:
pd.read_sql_query("SELECT * FROM nominations LIMIT 20;", conn)

# Each row describes an individual actor's nomination.

Unnamed: 0,id,ceremony_id,category,nominee,movie,character,won
0,1,10,Actor -- Leading Role,Javier Bardem,Biutiful,Uxbal,0
1,2,10,Actor -- Leading Role,Jeff Bridges,True Grit,Rooster Cogburn,0
2,3,10,Actor -- Leading Role,Jesse Eisenberg,The Social Network,Mark Zuckerberg,0
3,4,10,Actor -- Leading Role,Colin Firth,The King's Speech,King George VI,1
4,5,10,Actor -- Leading Role,James Franco,127 Hours,Aron Ralston,0
5,6,10,Actor -- Supporting Role,Christian Bale,The Fighter,Dicky Eklund,1
6,7,10,Actor -- Supporting Role,John Hawkes,Winter's Bone,Teardrop,0
7,8,10,Actor -- Supporting Role,Jeremy Renner,The Town,James Coughlin,0
8,9,10,Actor -- Supporting Role,Mark Ruffalo,The Kids Are All Right,Paul,0
9,10,10,Actor -- Supporting Role,Geoffrey Rush,The King's Speech,Lionel Logue,0


**ceremony_id** - integer field, foreign key reference to the id column from the ceremonies table.  

In [4]:
pd.read_sql_query('PRAGMA table_info(nominations);', conn)

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,integer,0,,1
1,1,ceremony_id,integer,0,,0
2,2,category,text,0,,0
3,3,nominee,text,0,,0
4,4,movie,text,0,,0
5,5,character,text,0,,0
6,6,won,boolean,0,,0


In [5]:
pd.read_sql_query("SELECT * FROM ceremonies;", conn)

# Each row describes an individual Academy Awards ceremony.

Unnamed: 0,id,year,host
0,0,2000,Billy Crystal
1,1,2001,Steve Martin
2,2,2002,Whoopi Goldberg
3,3,2003,Steve Martin
4,4,2004,Billy Crystal
5,5,2005,Chris Rock
6,6,2006,Jon Stewart
7,7,2007,Ellen DeGeneres
8,8,2008,Jon Stewart
9,9,2009,Hugh Jackman


In [6]:
pd.read_sql_query('PRAGMA table_info(ceremonies);', conn)

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,integer,0,,1
1,1,year,integer,0,,0
2,2,host,text,0,,0


In [7]:
query = 'SELECT * from nominations INNER JOIN ceremonies ON nominations.ceremony_id==ceremonies.id;'
pd.read_sql_query(query, conn)

Unnamed: 0,id,ceremony_id,category,nominee,movie,character,won,id.1,year,host
0,1,10,Actor -- Leading Role,Javier Bardem,Biutiful,Uxbal,0,10,2010,Steve Martin
1,2,10,Actor -- Leading Role,Jeff Bridges,True Grit,Rooster Cogburn,0,10,2010,Steve Martin
2,3,10,Actor -- Leading Role,Jesse Eisenberg,The Social Network,Mark Zuckerberg,0,10,2010,Steve Martin
3,4,10,Actor -- Leading Role,Colin Firth,The King's Speech,King George VI,1,10,2010,Steve Martin
4,5,10,Actor -- Leading Role,James Franco,127 Hours,Aron Ralston,0,10,2010,Steve Martin
5,6,10,Actor -- Supporting Role,Christian Bale,The Fighter,Dicky Eklund,1,10,2010,Steve Martin
6,7,10,Actor -- Supporting Role,John Hawkes,Winter's Bone,Teardrop,0,10,2010,Steve Martin
7,8,10,Actor -- Supporting Role,Jeremy Renner,The Town,James Coughlin,0,10,2010,Steve Martin
8,9,10,Actor -- Supporting Role,Mark Ruffalo,The Kids Are All Right,Paul,0,10,2010,Steve Martin
9,10,10,Actor -- Supporting Role,Geoffrey Rush,The King's Speech,Lionel Logue,0,10,2010,Steve Martin


In [8]:
query = 'SELECT * from ceremonies INNER JOIN nominations ON nominations.ceremony_id==ceremonies.id;'
pd.read_sql_query(query, conn)

Unnamed: 0,id,year,host,id.1,ceremony_id,category,nominee,movie,character,won
0,10,2010,Steve Martin,1,10,Actor -- Leading Role,Javier Bardem,Biutiful,Uxbal,0
1,10,2010,Steve Martin,2,10,Actor -- Leading Role,Jeff Bridges,True Grit,Rooster Cogburn,0
2,10,2010,Steve Martin,3,10,Actor -- Leading Role,Jesse Eisenberg,The Social Network,Mark Zuckerberg,0
3,10,2010,Steve Martin,4,10,Actor -- Leading Role,Colin Firth,The King's Speech,King George VI,1
4,10,2010,Steve Martin,5,10,Actor -- Leading Role,James Franco,127 Hours,Aron Ralston,0
5,10,2010,Steve Martin,6,10,Actor -- Supporting Role,Christian Bale,The Fighter,Dicky Eklund,1
6,10,2010,Steve Martin,7,10,Actor -- Supporting Role,John Hawkes,Winter's Bone,Teardrop,0
7,10,2010,Steve Martin,8,10,Actor -- Supporting Role,Jeremy Renner,The Town,James Coughlin,0
8,10,2010,Steve Martin,9,10,Actor -- Supporting Role,Mark Ruffalo,The Kids Are All Right,Paul,0
9,10,2010,Steve Martin,10,10,Actor -- Supporting Role,Geoffrey Rush,The King's Speech,Lionel Logue,0


We instead chose to **normalize** the data, which involves separating data into smaller tables with less redundant information and creating relations between the appropriate tables.

You can read more about the benefits of **database normalization** [here](https://en.wikipedia.org/wiki/Database_normalization#Objectives).

There are many types of relations you can create between tables to represent the links between columns. In this mission, we'll focus on the 2 most common relations:
* one-to-many.
* many-to-many.

A **one-to-many** relation exists whenever many rows in one table need to relate to one row in the other table.

Here are some other examples of **one-to-many** relations:
* a car insurance policy can have multiple people on it, but each person can only belong to one policy.
* a mother can have many children, but each child can only have one birth mother.
* a reporter can have many articles but each article can only have one associated reporter.

In [9]:
# Return all of the movies that won an award in 2010.
query = 'SELECT movie FROM nominations \
INNER JOIN ceremonies \
ON nominations.ceremony_id==ceremonies.id \
WHERE ceremonies.year==2010 AND nominations.won==1;'
pd.read_sql_query(query, conn)

Unnamed: 0,movie
0,The King's Speech
1,The Fighter
2,Black Swan
3,The Fighter


In [10]:
# Select the year and host columns from ceremonies and the movie and nominee columns from nominations:
query = 'SELECT ceremonies.year,ceremonies.host,nominations.movie,nominations.nominee \
FROM nominations \
INNER JOIN ceremonies \
ON nominations.ceremony_id==ceremonies.id \
WHERE ceremonies.year==2010;'
pd.read_sql_query(query, conn)

Unnamed: 0,year,host,movie,nominee
0,2010,Steve Martin,Biutiful,Javier Bardem
1,2010,Steve Martin,True Grit,Jeff Bridges
2,2010,Steve Martin,The Social Network,Jesse Eisenberg
3,2010,Steve Martin,The King's Speech,Colin Firth
4,2010,Steve Martin,127 Hours,James Franco
5,2010,Steve Martin,The Fighter,Christian Bale
6,2010,Steve Martin,Winter's Bone,John Hawkes
7,2010,Steve Martin,The Town,Jeremy Renner
8,2010,Steve Martin,The Kids Are All Right,Mark Ruffalo
9,2010,Steve Martin,The King's Speech,Geoffrey Rush


In [11]:
# A query that returns all of the years that the actress Natalie Portman was nominated for an award.
query = 'SELECT ceremonies.year,movie FROM nominations \
INNER JOIN ceremonies \
ON nominations.ceremony_id==ceremonies.id \
WHERE nominations.nominee=="Natalie Portman";'

portman_movies = conn.execute(query).fetchall()
print(portman_movies)

[(2010, 'Black Swan'), (2004, 'Closer')]


To model a **many-to-many** relationship, we need to create a separate table that contains the foreign keys to each of the tables that we're creating a many-to-many relationship between. 

This table is called a **join table**, but is often referenced by [many other names](https://en.wikipedia.org/wiki/Associative_entity).

The rows in the join table contain the foreign keys to the 2 other tables.

Creating a join table is similar to creating a regular table except that there need to be 2 foreign columns that reference the 2 tables in the many-to-many relationship:

**CREATE table movies_actors (  
id INTEGER PRIMARY KEY,  
movie_id INTEGER REFERENCES movies(id),  
actor_id INTEGER REFERENCES actors(id)  
);**

In [12]:
pd.read_sql_query("SELECT * FROM movies_actors LIMIT 10;", conn)

Unnamed: 0,id,movie_id,actor_id
0,1,1,1
1,2,2,2
2,3,3,3
3,4,4,4
4,5,5,5
5,6,6,6
6,7,7,7
7,8,8,8
8,9,9,9
9,10,4,10


In [13]:
pd.read_sql_query("SELECT * FROM actors LIMIT 10;", conn)

Unnamed: 0,id,actor
0,1,Javier Bardem
1,2,Jeff Bridges
2,3,Jesse Eisenberg
3,4,Colin Firth
4,5,James Franco
5,6,Christian Bale
6,7,John Hawkes
7,8,Jeremy Renner
8,9,Mark Ruffalo
9,10,Geoffrey Rush


In [14]:
pd.read_sql_query("SELECT * FROM movies LIMIT 10;", conn)

Unnamed: 0,id,movie
0,1,Biutiful
1,2,True Grit
2,3,The Social Network
3,4,The King's Speech
4,5,127 Hours
5,6,The Fighter
6,7,Winter's Bone
7,8,The Town
8,9,The Kids Are All Right
9,10,Crazy Heart


In [15]:
# A query that returns the first 5 rows in movies_actors.
query = 'SELECT * FROM movies_actors LIMIT 5;'

five_join_table = conn.execute(query).fetchall()
five_join_table

[(1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4), (5, 5, 5)]

In [16]:
# A query that returns the first 5 rows in actors.
query = 'SELECT * FROM actors LIMIT 5;'

five_actors = conn.execute(query).fetchall()
five_actors

[(1, 'Javier Bardem'),
 (2, 'Jeff Bridges'),
 (3, 'Jesse Eisenberg'),
 (4, 'Colin Firth'),
 (5, 'James Franco')]

In [17]:
# A query that returns the first 5 rows in movies.
query = 'SELECT * FROM movies LIMIT 5;'

five_movies = conn.execute(query).fetchall()
five_movies

[(1, 'Biutiful'),
 (2, 'True Grit'),
 (3, 'The Social Network'),
 (4, "The King's Speech"),
 (5, '127 Hours')]