We've often needed to count the number of records that matched a particular SQL query.

In [1]:
# Import sqlite3.
import sqlite3

In [2]:
# Initialize a connection to factbook.db using the connect() method, and store it in the variable conn.
conn = sqlite3.connect('factbook.db')

In [3]:
# Use conn, the execute() method, and the fetchall() method to fetch all of the records in the facts table.
# Assign the result to the facts variable.
facts = conn.execute("SELECT * FROM facts;").fetchall()

# Print out the facts variable.
print(facts)

# Count the number of items in facts, and assign the result to facts_count.
facts_count = len(facts)
print("facts_count = %d"%facts_count)

[(1, 'af', 'Afghanistan', 652230, 652230, 0, 32564342, 2.32, 38.57, 13.89, 1.51, '2015-11-01 13:19:49.461734', '2015-11-01 13:19:49.461734'), (2, 'al', 'Albania', 28748, 27398, 1350, 3029278, 0.3, 12.92, 6.58, 3.3, '2015-11-01 13:19:54.431082', '2015-11-01 13:19:54.431082'), (3, 'ag', 'Algeria', 2381741, 2381741, 0, 39542166, 1.84, 23.67, 4.31, 0.92, '2015-11-01 13:19:59.961286', '2015-11-01 13:19:59.961286'), (4, 'an', 'Andorra', 468, 468, 0, 85580, 0.12, 8.13, 6.96, 0.0, '2015-11-01 13:20:03.659945', '2015-11-01 13:20:03.659945'), (5, 'ao', 'Angola', 1246700, 1246700, 0, 19625353, 2.78, 38.78, 11.49, 0.46, '2015-11-01 13:20:08.625072', '2015-11-01 13:20:08.625072'), (6, 'ac', 'Antigua and Barbuda', 442, 442, 0, 92436, 1.24, 15.85, 5.69, 2.21, '2015-11-01 13:20:13.049627', '2015-11-01 13:20:13.049627'), (7, 'ar', 'Argentina', 2780400, 2736690, 43710, 43431886, 0.93, 16.64, 7.33, 0.0, '2015-11-01 13:20:18.233063', '2015-11-01 13:20:18.233063'), (8, 'am', 'Armenia', 29743, 28203, 1540, 

SQL has a **COUNT** aggregation function that allows us to count the number of records in a table.

In [4]:
# Count the number of rows in the facts table.
conn.execute("SELECT COUNT(*) FROM facts;").fetchall()

[(261,)]

In [5]:
# Count the total number of non-null values in the area_water column. 
conn.execute("SELECT COUNT(area_water) FROM facts;").fetchall()

[(243,)]

In [6]:
# Count the number of non-null values in the birth_rate column of the facts table.
result = conn.execute("SELECT COUNT(birth_rate) FROM facts;").fetchall()

# Extract the integer value from the result, and assign it to birth_rate_count.
birth_rate_count = result[0][0]

# Display birth_rate_count using the print() function.
print(birth_rate_count)

228


SQL has other aggregation functions. **MIN** and **MAX**, for example, find the minimum and maximum values in a column.

In [7]:
# Find the highest value in the birth_rate column of the facts table.
conn.execute("SELECT MAX(birth_rate) FROM facts;").fetchall()

[(45.45,)]

In [8]:
# Use the MIN function to find the minimum value in the population_growth column.
min_population_growth = conn.execute("SELECT MIN(population_growth) FROM facts;").fetchall()[0][0]

print(min_population_growth)

0.0


In [9]:
# Use the MAX function to find the maximum value in the death_rate column.
max_death_rate = conn.execute("SELECT MAX(death_rate) FROM facts;").fetchall()[0][0]

print(max_death_rate)

14.89


**SUM** finds the total of all of the values in a numeric column. **AVG** finds the mean of all of the non-null values in a column.

In [10]:
conn.execute("SELECT SUM(birth_rate) FROM facts;").fetchall()

[(4406.909999999998,)]

In [11]:
conn.execute("SELECT AVG(birth_rate) FROM facts;").fetchall()

[(19.32855263157894,)]

In [12]:
# Use the SUM function to find the sum of the area_land column.
total_land_area = conn.execute("SELECT SUM(area_land) FROM facts;").fetchall()[0][0]

print(total_land_area)

128584834


In [13]:
# Use the AVG function to find the mean of the area_water column.
avg_water_area = conn.execute("SELECT AVG(area_water) FROM facts;").fetchall()[0][0]

print(avg_water_area)

19067.59259259259


In [14]:
# Combine multiple aggregation functions into a single query.
conn.execute("SELECT COUNT(*), SUM(death_rate), AVG(population_growth) FROM facts;").fetchall()

[(261, 1783.2500000000002, 1.2009745762711865)]

In [15]:
result_turple = conn.execute("SELECT AVG(population), SUM(population), MAX(birth_rate) FROM facts;").fetchall()[0]

print(result_turple)

mean_pop = result_turple[0];
sum_pop = result_turple[1];
max_birth_rate = result_turple[2];

(62094928.32231405, 15026972654, 45.45)


In [16]:
# Count the number of rows where population is greater than 5000000.
conn.execute("SELECT COUNT(*) FROM facts WHERE population>5000000;").fetchall()

[(122,)]

In [17]:
# Calculate the mean population_growth for countries with a population greater than 10000000.
population_growth = conn.execute("SELECT AVG(population_growth) FROM facts WHERE population>10000000;").fetchall()[0][0]

print(population_growth)

1.4572222222222226


In [18]:
# This query will return all of the unique values in the name column of facts.
# It won't return any duplicate values.
conn.execute("SELECT DISTINCT name FROM facts;").fetchmany(10)

[('Afghanistan',),
 ('Albania',),
 ('Algeria',),
 ('Andorra',),
 ('Angola',),
 ('Antigua and Barbuda',),
 ('Argentina',),
 ('Armenia',),
 ('Australia',),
 ('Austria',)]

In [19]:
# The query will select the unique combinations of values in the population and name columns from facts.
conn.execute("SELECT DISTINCT name, population FROM facts;").fetchmany(10)

[('Afghanistan', 32564342),
 ('Albania', 3029278),
 ('Algeria', 39542166),
 ('Andorra', 85580),
 ('Angola', 19625353),
 ('Antigua and Barbuda', 92436),
 ('Argentina', 43431886),
 ('Armenia', 3056382),
 ('Australia', 22751014),
 ('Austria', 8665550)]

In [20]:
# Select all of the distinct values in the birth_rate column of the facts table.
unique_birth_rates = conn.execute("SELECT DISTINCT birth_rate FROM facts;").fetchmany(10)

print(unique_birth_rates)

[(38.57,), (12.92,), (23.67,), (8.13,), (38.78,), (15.85,), (16.64,), (13.61,), (12.15,), (9.41,)]


In [21]:
# The query will count all of the distinct values in the population column.
conn.execute("SELECT COUNT(DISTINCT population) FROM facts;").fetchall()

[(241,)]

In [22]:
# This query will find the mean of all of the distinct values in the birth_rate column.
conn.execute("SELECT AVG(DISTINCT birth_rate) FROM facts;").fetchall()

[(19.563009259259253,)]

In [23]:
# Find the average of all of the distinct values in the birth_rate column where population is greater than 20000000.
average_birth_rate = conn.execute("SELECT AVG(DISTINCT birth_rate) FROM facts WHERE population>20000000;").fetchall()[0][0]

print(average_birth_rate)

20.43473684210527


In [24]:
# Find the sum of all of the distinct values in the population column where area_land is greater than 1000000.
sum_population = conn.execute("SELECT SUM(DISTINCT population) FROM facts WHERE area_land>1000000;").fetchall()[0][0]

print(sum_population)

4233873015


In [25]:
# The query will divide each value in the population column by 1000000, and return the result.
# Because the population column contains integers and we're dividing by an integer, 
# the results will be integers as well.
conn.execute("SELECT population/1000000 FROM facts;").fetchmany(10)

[(32,), (3,), (39,), (0,), (19,), (0,), (43,), (3,), (22,), (8,)]

In [26]:
# The query will return a series of floats, instead of rounding the values to integers.
conn.execute("SELECT population/1000000.0 FROM facts;").fetchmany(10)

[(32.564342,),
 (3.029278,),
 (39.542166,),
 (0.08558,),
 (19.625353,),
 (0.092436,),
 (43.431886,),
 (3.056382,),
 (22.751014,),
 (8.66555,)]

Here are the rules for what an arithmetic operation will return:
* Two floats - Returns a float (ex. SELECT birth_rate / 1000000.0 FROM facts;)
* A float and an integer - Returns a float (ex. SELECT population / 1000000.0 FROM facts;)
* Two integers - Returns an integer (ex. SELECT population / 1000000 FROM facts;)

In [27]:
# Use arithmetic operators in a SQL query to express population_growth in terms of millions.
population_growth_millions = conn.execute("SELECT population_growth/1000000.0 FROM facts;").fetchmany(10)

print(population_growth_millions)

[(2.32e-06,), (3e-07,), (1.8400000000000002e-06,), (1.2e-07,), (2.7799999999999996e-06,), (1.24e-06,), (9.300000000000001e-07,), (1.5e-07,), (1.0700000000000001e-06,), (5.5e-07,)]


In [28]:
# The query will divide each value in the birth_rate column by the corresponding value in the death_rate column.
conn.execute("SELECT birth_rate/death_rate FROM facts;").fetchmany(10)

[(2.776817854571634,),
 (1.9635258358662613,),
 (5.491879350348029,),
 (1.168103448275862,),
 (3.3751087902523933,),
 (2.785588752196836,),
 (2.27012278308322,),
 (1.4571734475374731,),
 (1.7016806722689077,),
 (0.9989384288747346,)]

In [29]:
# The query will add together the birth_rate and migration_rate columns, then divide by the death_rate column.
conn.execute("SELECT (birth_rate+migration_rate)/death_rate FROM facts;").fetchmany(10)

[(2.8855291576673863,),
 (2.465045592705167,),
 (5.705336426914155,),
 (1.168103448275862,),
 (3.4151436031331595,),
 (3.173989455184534,),
 (2.27012278308322,),
 (2.0781584582441113,),
 (2.492997198879552,),
 (1.589171974522293,)]

In [30]:
# Use a SQL query to compute the population of each country a year from now.
next_year_population = conn.execute("SELECT population*(1+population_growth/100.0) FROM facts;").fetchmany(10)

print(next_year_population)

[(33319834.734400004,), (3038365.834,), (40269741.8544,), (85682.69600000001,), (20170937.8134,), (93582.2064,), (43835802.5398,), (3060966.5730000003,), (22994449.849799998,), (8713210.525,)]


It's often advantageous to do these computations in the SQL database instead of a Python environment because it's faster to code and execute.