1. Load and Explore the Data

    - Import the necessary libraries: pandas and sqlalchemy.
    - Load the CSV file into a pandas dataframe.
    - Display the first few rows of the dataframe to understand the structure of the data.
    - Print the column names of the dataframe.

In [2]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # linear algebra
import sqlite3 #`SQLite` database library

In [5]:
# loa the CSV into a DataFrame
df = pd.read_csv('netflix_titles.csv')

# display the first 5 rows of the DataFrame
display(df.head())

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [6]:
# print the column names of the DataFrame
print(df.columns)

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')


2. Create SQLite Database and Export DataFrame

    - Create a SQLite database using SQLAlchemy.
    - Export the dataframe as a table to the SQLite database.

In [7]:
# create SQLite database using SQLAlchemy
conn = sqlite3.connect('netflix.db')

# export the dataframe as a table to the SQLite database
df.to_sql('netflix', conn, if_exists='replace', index=False)

8807

3. Query 1: Select *


In [9]:
# Write and execute a SQL query to select all records from the Netflix table.
query = """
SELECT *
FROM netflix
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


4. Query 2: Select Distinct

    


In [10]:
# Write and execute a SQL query to select distinct show IDs from the Netflix table.
query = """
SELECT DISTINCT show_id FROM netflix
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,show_id
0,s1
1,s2
2,s3
3,s4
4,s5


5. Query 3: Select Where

    


In [11]:
# Write and execute a SQL query to select distinct titles of shows released after 2020.
query = """
SELECT DISTINCT title FROM netflix WHERE release_year > 2020
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title
0,Blood & Water
1,Ganglands
2,Jailbirds New Orleans
3,Kota Factory
4,Midnight Mass


6. Query 4: Select and, or, not

    


In [13]:
# Write and execute a SQL query to select distinct titles of shows released after 2020 with specific conditions.
query = """
SELECT DISTINCT title
FROM netflix
WHERE release_year > 2020
AND type = 'TV Show'
AND rating = 'TV-MA'
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title
0,Blood & Water
1,Ganglands
2,Jailbirds New Orleans
3,Kota Factory
4,Midnight Mass


7. Query 5: Order By

    


In [14]:
# Write and execute a SQL query to select and order the titles of Netflix shows.
query = """
SELECT title
FROM netflix
ORDER BY title
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title
0,#Alive
1,#AnneFrank - Parallel Stories
2,#FriendButMarried
3,#FriendButMarried 2
4,#Roxy


8. Query 6: Limit Values

    


In [15]:
# Write and execute a SQL query to select the first 5 records from the Netflix table.
query = """
SELECT *
FROM netflix
LIMIT 5
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


9. Query 7: Min, Max, Count, Avg, Sum

    


In [16]:
# Write and execute a SQL query to find the minimum, maximum, count, average, and sum of release years in the Netflix dataset.
query = """
SELECT MIN(release_year) AS min_release_year, 
       MAX(release_year) AS max_release_year, 
       COUNT(release_year) AS count_release_year, 
       AVG(release_year) AS avg_release_year, 
       SUM(release_year) AS sum_release_year
FROM netflix
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,min_release_year,max_release_year,count_release_year,avg_release_year,sum_release_year
0,1925,2021,8807,2014.180198,17738885


10. Query 8: Like

    


In [17]:
# Write and execute a SQL query to find countries that match specific patterns.
query = """
SELECT DISTINCT country
FROM netflix
WHERE country LIKE '%United%'
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,country
0,United States
1,"United States, Ghana, Burkina Faso, United Kin..."
2,United Kingdom
3,"United States, India, France"
4,"China, Canada, United States"


11. Query 9: In

    


In [18]:
# Write and execute a SQL query to select records where the country is in a specific list.
query = """
SELECT *
FROM netflix
WHERE country IN ('United States', 'United Kingdom', 'India')
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
2,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...
3,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...
4,s16,TV Show,Dear White People,,"Logan Browning, Brandon P. Bell, DeRon Horton,...",United States,"September 22, 2021",2021,TV-MA,4 Seasons,"TV Comedies, TV Dramas",Students of color navigate the daily slights a...


12. Query 10: Between

    


In [19]:
# Write and execute a SQL query to select records where the release year is between 2020 and 2021.
query = """
SELECT *
FROM netflix
WHERE release_year BETWEEN 2020 AND 2021
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


13. Query 11: Joins

    


In [20]:
# Write and execute a SQL query to join the Netflix table with itself and select specific columns.
query = """
SELECT n1.title AS title1, n2.title AS title2
FROM netflix n1
JOIN netflix n2 ON n1.show_id = n2.show_id
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title1,title2
0,Dick Johnson Is Dead,Dick Johnson Is Dead
1,Blood & Water,Blood & Water
2,Ganglands,Ganglands
3,Jailbirds New Orleans,Jailbirds New Orleans
4,Kota Factory,Kota Factory


14. Query 12: Unions

    


In [21]:
# Write and execute a SQL query to union two different selections from the Netflix table.
query = """
SELECT title, type
FROM netflix
UNION
SELECT director, type
FROM netflix
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title,type
0,,Movie
1,,TV Show
2,#Alive,Movie
3,#AnneFrank - Parallel Stories,Movie
4,#FriendButMarried,Movie


15. Query 13: Case Statements

    


In [22]:
# Write and execute a SQL query to use CASE statements for conditional logic.
query = """
SELECT title,
    CASE
        WHEN type = 'TV Show' THEN 'TV Show'
        ELSE 'Movie'
    END AS show_type
    FROM netflix
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title,show_type
0,Dick Johnson Is Dead,Movie
1,Blood & Water,TV Show
2,Ganglands,TV Show
3,Jailbirds New Orleans,TV Show
4,Kota Factory,TV Show


16. Query 14: Sub Queries

    


In [23]:
# Write and execute a SQL query to use a subquery for selection.
query = """
SELECT title, type
FROM netflix
WHERE release_year IN (SELECT release_year FROM netflix WHERE type = 'TV Show')
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title,type
0,Dick Johnson Is Dead,Movie
1,Blood & Water,TV Show
2,Ganglands,TV Show
3,Jailbirds New Orleans,TV Show
4,Kota Factory,TV Show


17. Query 15: Coalesce

    


In [24]:
# Write and execute a SQL query to use COALESCE to handle null values.
query = """
SELECT title, COALESCE(director, 'No Director') AS director
FROM netflix
WHERE director IS NULL
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title,director
0,Blood & Water,No Director
1,Jailbirds New Orleans,No Director
2,Kota Factory,No Director
3,"Vendetta: Truth, Lies and The Mafia",No Director
4,Crime Stories: India Detectives,No Director


18. Query 16: Convert

    


In [27]:
# Write and execute a SQL query to convert data types.
query = """
SELECT title, CAST(release_year AS TEXT) AS release_year
FROM netflix
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title,release_year
0,Dick Johnson Is Dead,2020
1,Blood & Water,2021
2,Ganglands,2021
3,Jailbirds New Orleans,2021
4,Kota Factory,2021


19. Query 17: Lag/Lead

    


In [28]:
# Write and execute a SQL query to use LAG/LEAD for window functions.
query = """
SELECT title, release_year,
       LAG(release_year) OVER (ORDER BY release_year) AS prev_release_year,
       LEAD(release_year) OVER (ORDER BY release_year) AS next_release_year
FROM netflix
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title,release_year,prev_release_year,next_release_year
0,Pioneers: First Women Filmmakers*,1925,,1942.0
1,Prelude to War,1942,1925.0,1942.0
2,The Battle of Midway,1942,1942.0,1943.0
3,Undercover: How to Operate Behind Enemy Lines,1943,1942.0,1943.0
4,Why We Fight: The Battle of Russia,1943,1943.0,1943.0


20. Query 18: Row Number

    


In [30]:
# Write and execute a SQL query to use ROW_NUMBER for window functions.
query = """
SELECT title, release_year,
       ROW_NUMBER() OVER (ORDER BY release_year) AS row_number
FROM netflix
"""

# Execute the query
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,title,release_year,row_number
0,Pioneers: First Women Filmmakers*,1925,1
1,Prelude to War,1942,2
2,The Battle of Midway,1942,3
3,Undercover: How to Operate Behind Enemy Lines,1943,4
4,Why We Fight: The Battle of Russia,1943,5
