## Create a New Database [(tutorial)](https://www.sqlitetutorial.net/sqlite-python/creating-database/)
There are CSV files in the current directory, transform them into SQLite3 databases

In [17]:
import sqlite3
import pandas as pd

con = sqlite3.connect("films.db")
cur = con.cursor()
cur.execute("DROP TABLE IF EXISTS films")
names = ['id', 'title', 'release_year', 'country', 'duration', 'language', 'certification', 'gross', 'budget']
films = pd.read_csv("films.csv", names=names)
films.to_sql('films', con, if_exists='append', index = False, index_label='id', chunksize = 10000)
con.close()


con = sqlite3.connect("people.db")
cur = con.cursor()
cur.execute("DROP TABLE IF EXISTS people")
names = ['id', 'name', 'birthdate', 'deathdate']
films = pd.read_csv("people.csv", names=names)
films.to_sql('people', con, if_exists='append', index = False, index_label='id', chunksize = 10000)
con.close()


con = sqlite3.connect("reviews.db")
cur = con.cursor()
cur.execute("DROP TABLE IF EXISTS reviews")
names = ['film_id', 'num_user', 'num_critic', 'imdb_score', 'num_votes', 'facebook_likes']
films = pd.read_csv("reviews.csv", names=names)
films.to_sql('reviews', con, if_exists='append', index = False, index_label='id', chunksize = 10000)
con.close()


## Queries to database people.db

In [36]:
# connect to database
con = sqlite3.connect("people.db")
cur = con.cursor()

COUNT(*) tells you how many records are in a table.  
However, if you want to count the number of non-missing values in a particular field, 
you can call COUNT() on just that field.

Looking at the differences between the count of separate fields values and the count of all records can provide useful insights into your data.

In [26]:
# Count the number of records in the people table
pd.read_sql("""SELECT COUNT(id) AS count_records 
               FROM people;""",
            con)


Unnamed: 0,count_records
0,8397


In [37]:
# Count the number of birthdates in the people table
pd.read_sql("""SELECT COUNT(birthdate) AS count_birthdate
               FROM people;""",
            con)


Unnamed: 0,count_birthdate
0,6152


In [30]:
con.close()

## Queries to database films.db

In [23]:
# connect to database
con = sqlite3.connect("films.db")
cur = con.cursor()

### COUNT(*) tells you how many records are in a table. However, if you want to count the number of non-missing values in a particular field, you can call COUNT() on just that field.

In [45]:
# Count the languages and countries represented in the films table

"""Looking at the differences between the count of separate fields values 
and the count of all records can provide useful insights into your data."""

pd.read_sql("""SELECT COUNT(*) AS count_all_records, 
               COUNT(language) AS count_languages, 
               COUNT(country) AS count_countries
               FROM films;""",
            con)

Unnamed: 0,count_all_records,count_languages,count_countries
0,4968,4957,4966


### Often query results will include many duplicate values. You can use the DISTINCT keyword to select the unique values from a field.

In [46]:
# Return the unique countries from the films table

pd.read_sql("""SELECT DISTINCT country 
               FROM films;""",
            con)

Unnamed: 0,country
0,USA
1,Germany
2,Japan
3,Denmark
4,UK
...,...
60,Kenya
61,Slovenia
62,Pakistan
63,Chile


In [48]:
# Count the distinct countries from the films table

pd.read_sql("""SELECT COUNT(DISTINCT country) AS count_distinct_countries
               FROM films;""",
            con)

Unnamed: 0,count_distinct_countries
0,64


### Filtering with WHERE allows you to analyze your data better.  WHERE can also filter string values.

In [24]:
# Count the Spanish-language films

pd.read_sql("""SELECT COUNT(*) AS count_spanish
               FROM films
               WHERE language = 'Spanish';""",
            con)

Unnamed: 0,count_spanish
0,40


In [34]:
con.close()

# Queries to database reviews.db

In [18]:
# connect to database
con = sqlite3.connect("reviews.db")
cur = con.cursor()

### Filtering with WHERE allows you to analyze your data better.  WHERE can also filter string values.

In [19]:
# Select film_ids and imdb_score with an imdb_score over 7.0
pd.read_sql("""SELECT film_id, imdb_score
               FROM reviews
               WHERE imdb_score > 7.0;""",
            con)

Unnamed: 0,film_id,imdb_score
0,3934,7.1
1,74,7.6
2,1254,8.0
3,4841,8.1
4,3252,7.2
...,...,...
1531,199,8.0
1532,1814,7.2
1533,4158,8.0
1534,4086,7.1


In [20]:
# Select film_ids and facebook_likes for ten records with less than 1000 likes

pd.read_sql("""SELECT film_id, facebook_likes
               FROM reviews
               WHERE facebook_likes < 1000
               LIMIT 10;""",
            con)

Unnamed: 0,film_id,facebook_likes
0,3405,0
1,478,491
2,74,930
3,740,0
4,2869,689
5,1181,0
6,2020,0
7,2312,912
8,1820,872
9,831,975


In [21]:
# Count the records with at least 100,000 votes

pd.read_sql("""SELECT COUNT(num_votes) AS films_over_100K_votes
               FROM reviews
               WHERE num_votes > 100000;""",
            con)

Unnamed: 0,films_over_100K_votes
0,1211


In [10]:
con.close()