We've often needed to count the number of records that matched a particular SQL query.

In [1]:
# Import sqlite3.
import sqlite3

In [2]:
# Initialize a connection to factbook.db using the connect() method, and store it in the variable conn.
conn = sqlite3.connect('factbook.db')

In [3]:
# Use conn, the execute() method, and the fetchall() method to fetch all of the records in the facts table.
# Assign the result to the facts variable.
facts = conn.execute("SELECT * FROM facts;").fetchall()

# Print out the facts variable.
print(facts)

# Count the number of items in facts, and assign the result to facts_count.
facts_count = len(facts)
print("facts_count = %d"%facts_count)

[(1, 'af', 'Afghanistan', 652230, 652230, 0, 32564342, 2.32, 38.57, 13.89, 1.51, '2015-11-01 13:19:49.461734', '2015-11-01 13:19:49.461734'), (2, 'al', 'Albania', 28748, 27398, 1350, 3029278, 0.3, 12.92, 6.58, 3.3, '2015-11-01 13:19:54.431082', '2015-11-01 13:19:54.431082'), (3, 'ag', 'Algeria', 2381741, 2381741, 0, 39542166, 1.84, 23.67, 4.31, 0.92, '2015-11-01 13:19:59.961286', '2015-11-01 13:19:59.961286'), (4, 'an', 'Andorra', 468, 468, 0, 85580, 0.12, 8.13, 6.96, 0.0, '2015-11-01 13:20:03.659945', '2015-11-01 13:20:03.659945'), (5, 'ao', 'Angola', 1246700, 1246700, 0, 19625353, 2.78, 38.78, 11.49, 0.46, '2015-11-01 13:20:08.625072', '2015-11-01 13:20:08.625072'), (6, 'ac', 'Antigua and Barbuda', 442, 442, 0, 92436, 1.24, 15.85, 5.69, 2.21, '2015-11-01 13:20:13.049627', '2015-11-01 13:20:13.049627'), (7, 'ar', 'Argentina', 2780400, 2736690, 43710, 43431886, 0.93, 16.64, 7.33, 0.0, '2015-11-01 13:20:18.233063', '2015-11-01 13:20:18.233063'), (8, 'am', 'Armenia', 29743, 28203, 1540, 

SQL has a **COUNT** aggregation function that allows us to count the number of records in a table.

In [4]:
# Count the number of rows in the facts table.
conn.execute("SELECT COUNT(*) FROM facts;").fetchall()

[(261,)]

In [5]:
# Count the total number of non-null values in the area_water column. 
conn.execute("SELECT COUNT(area_water) FROM facts;").fetchall()

[(243,)]

In [6]:
# Count the number of non-null values in the birth_rate column of the facts table.
result = conn.execute("SELECT COUNT(birth_rate) FROM facts;").fetchall()

# Extract the integer value from the result, and assign it to birth_rate_count.
birth_rate_count = result[0][0]

# Display birth_rate_count using the print() function.
print(birth_rate_count)

228


SQL has other aggregation functions. **MIN** and **MAX**, for example, find the minimum and maximum values in a column.

In [7]:
# Find the highest value in the birth_rate column of the facts table.
conn.execute("SELECT MAX(birth_rate) FROM facts;").fetchall()

[(45.45,)]

In [8]:
# Use the MIN function to find the minimum value in the population_growth column.
min_population_growth = conn.execute("SELECT MIN(population_growth) FROM facts;").fetchall()[0][0]

print(min_population_growth)

0.0


In [9]:
# Use the MAX function to find the maximum value in the death_rate column.
max_death_rate = conn.execute("SELECT MAX(death_rate) FROM facts;").fetchall()[0][0]

print(max_death_rate)

14.89


**SUM** finds the total of all of the values in a numeric column. **AVG** finds the mean of all of the non-null values in a column.

In [10]:
conn.execute("SELECT SUM(birth_rate) FROM facts;").fetchall()

[(4406.909999999998,)]

In [11]:
conn.execute("SELECT AVG(birth_rate) FROM facts;").fetchall()

[(19.32855263157894,)]

In [12]:
# Use the SUM function to find the sum of the area_land column.
total_land_area = conn.execute("SELECT SUM(area_land) FROM facts;").fetchall()[0][0]

print(total_land_area)

128584834


In [13]:
# Use the AVG function to find the mean of the area_water column.
avg_water_area = conn.execute("SELECT AVG(area_water) FROM facts;").fetchall()[0][0]

print(avg_water_area)

19067.59259259259


In [14]:
# Combine multiple aggregation functions into a single query.
conn.execute("SELECT COUNT(*), SUM(death_rate), AVG(population_growth) FROM facts;").fetchall()

[(261, 1783.2500000000002, 1.2009745762711865)]

In [15]:
result_turple = conn.execute("SELECT AVG(population), SUM(population), MAX(birth_rate) FROM facts;").fetchall()[0]

print(result_turple)

mean_pop = result_turple[0];
sum_pop = result_turple[1];
max_birth_rate = result_turple[2];

(62094928.32231405, 15026972654, 45.45)


In [16]:
# Count the number of rows where population is greater than 5000000.
conn.execute("SELECT COUNT(*) FROM facts WHERE population>5000000;").fetchall()

[(122,)]

In [17]:
# Calculate the mean population_growth for countries with a population greater than 10000000.
population_growth = conn.execute("SELECT AVG(population_growth) FROM facts WHERE population>10000000;").fetchall()[0][0]

print(population_growth)

1.4572222222222226


In [18]:
# This query will return all of the unique values in the name column of facts.
# It won't return any duplicate values.
conn.execute("SELECT DISTINCT name FROM facts;").fetchall()

[('Afghanistan',),
 ('Albania',),
 ('Algeria',),
 ('Andorra',),
 ('Angola',),
 ('Antigua and Barbuda',),
 ('Argentina',),
 ('Armenia',),
 ('Australia',),
 ('Austria',),
 ('Azerbaijan',),
 ('Bahamas, The',),
 ('Bahrain',),
 ('Bangladesh',),
 ('Barbados',),
 ('Belarus',),
 ('Belgium',),
 ('Belize',),
 ('Benin',),
 ('Bhutan',),
 ('Bolivia',),
 ('Bosnia and Herzegovina',),
 ('Botswana',),
 ('Brazil',),
 ('Brunei',),
 ('Bulgaria',),
 ('Burkina Faso',),
 ('Burma',),
 ('Burundi',),
 ('Cambodia',),
 ('Cameroon',),
 ('Canada',),
 ('Cabo Verde',),
 ('Central African Republic',),
 ('Chad',),
 ('Chile',),
 ('China',),
 ('Colombia',),
 ('Comoros',),
 ('Congo, Democratic Republic of the',),
 ('Congo, Republic of the',),
 ('Costa Rica',),
 ("Cote d'Ivoire",),
 ('Croatia',),
 ('Cuba',),
 ('Cyprus',),
 ('Czech Republic',),
 ('Denmark',),
 ('Djibouti',),
 ('Dominica',),
 ('Dominican Republic',),
 ('Ecuador',),
 ('Egypt',),
 ('El Salvador',),
 ('Equatorial Guinea',),
 ('Eritrea',),
 ('Estonia',),
 ('Ethi

In [19]:
# The query will select the unique combinations of values in the population and name columns from facts.
conn.execute("SELECT DISTINCT name, population FROM facts;").fetchall()

[('Afghanistan', 32564342),
 ('Albania', 3029278),
 ('Algeria', 39542166),
 ('Andorra', 85580),
 ('Angola', 19625353),
 ('Antigua and Barbuda', 92436),
 ('Argentina', 43431886),
 ('Armenia', 3056382),
 ('Australia', 22751014),
 ('Austria', 8665550),
 ('Azerbaijan', 9780780),
 ('Bahamas, The', 324597),
 ('Bahrain', 1346613),
 ('Bangladesh', 168957745),
 ('Barbados', 290604),
 ('Belarus', 9589689),
 ('Belgium', 11323973),
 ('Belize', 347369),
 ('Benin', 10448647),
 ('Bhutan', 741919),
 ('Bolivia', 10800882),
 ('Bosnia and Herzegovina', 3867055),
 ('Botswana', 2182719),
 ('Brazil', 204259812),
 ('Brunei', 429646),
 ('Bulgaria', 7186893),
 ('Burkina Faso', 18931686),
 ('Burma', 56320206),
 ('Burundi', 10742276),
 ('Cambodia', 15708756),
 ('Cameroon', 23739218),
 ('Canada', 35099836),
 ('Cabo Verde', 545993),
 ('Central African Republic', 5391539),
 ('Chad', 11631456),
 ('Chile', 17508260),
 ('China', 1367485388),
 ('Colombia', 46736728),
 ('Comoros', 780971),
 ('Congo, Democratic Republic 

In [20]:
# Select all of the distinct values in the birth_rate column of the facts table.
unique_birth_rates = conn.execute("SELECT DISTINCT birth_rate FROM facts;").fetchall()

print(unique_birth_rates)

[(38.57,), (12.92,), (23.67,), (8.13,), (38.78,), (15.85,), (16.64,), (13.61,), (12.15,), (9.41,), (15.5,), (13.66,), (21.14,), (11.87,), (10.7,), (11.41,), (24.68,), (36.02,), (17.78,), (22.76,), (8.87,), (20.96,), (14.46,), (17.32,), (8.92,), (42.03,), (18.39,), (42.01,), (23.83,), (36.17,), (10.28,), (20.33,), (35.08,), (36.6,), (13.83,), (12.49,), (16.47,), (27.84,), (34.88,), (35.85,), (15.91,), (28.67,), (9.45,), (9.9,), (9.63,), (10.27,), (23.65,), (15.41,), (18.73,), (18.51,), (22.9,), (16.46,), (33.31,), (30.0,), (10.51,), (37.27,), (19.43,), (10.72,), (12.38,), (34.49,), (30.86,), (12.74,), (8.47,), (31.09,), (8.66,), (16.03,), (24.89,), (35.74,), (33.38,), (15.59,), (22.31,), (23.14,), (9.16,), (13.91,), (19.55,), (16.72,), (17.99,), (31.45,), (14.84,), (18.48,), (8.74,), (18.16,), (7.93,), (25.37,), (19.15,), (26.4,), (21.46,), (14.52,), (8.19,), (None,), (19.91,), (22.98,), (24.25,), (10.0,), (14.59,), (25.47,), (34.41,), (18.03,), (10.45,), (10.1,), (11.37,), (11.55,), (3

In [21]:
# The query will count all of the distinct values in the population column.
conn.execute("SELECT COUNT(DISTINCT population) FROM facts;").fetchall()

[(241,)]

In [22]:
# This query will find the mean of all of the distinct values in the birth_rate column.
conn.execute("SELECT AVG(DISTINCT birth_rate) FROM facts;").fetchall()

[(19.563009259259253,)]

In [23]:
# Find the average of all of the distinct values in the birth_rate column where population is greater than 20000000.
average_birth_rate = conn.execute("SELECT AVG(DISTINCT birth_rate) FROM facts WHERE population>20000000;").fetchall()[0][0]

print(average_birth_rate)

20.43473684210527


In [24]:
# Find the sum of all of the distinct values in the population column where area_land is greater than 1000000.
sum_population = conn.execute("SELECT SUM(DISTINCT population) FROM facts WHERE area_land>1000000;").fetchall()[0][0]

print(sum_population)

4233873015


In [25]:
# The query will divide each value in the population column by 1000000, and return the result.
# Because the population column contains integers and we're dividing by an integer, 
# the results will be integers as well.
conn.execute("SELECT population/1000000 FROM facts;").fetchall()

[(32,),
 (3,),
 (39,),
 (0,),
 (19,),
 (0,),
 (43,),
 (3,),
 (22,),
 (8,),
 (9,),
 (0,),
 (1,),
 (168,),
 (0,),
 (9,),
 (11,),
 (0,),
 (10,),
 (0,),
 (10,),
 (3,),
 (2,),
 (204,),
 (0,),
 (7,),
 (18,),
 (56,),
 (10,),
 (15,),
 (23,),
 (35,),
 (0,),
 (5,),
 (11,),
 (17,),
 (1367,),
 (46,),
 (0,),
 (79,),
 (4,),
 (4,),
 (23,),
 (4,),
 (11,),
 (1,),
 (10,),
 (5,),
 (0,),
 (0,),
 (10,),
 (15,),
 (88,),
 (6,),
 (0,),
 (6,),
 (1,),
 (99,),
 (0,),
 (5,),
 (66,),
 (1,),
 (1,),
 (4,),
 (80,),
 (26,),
 (10,),
 (0,),
 (14,),
 (11,),
 (1,),
 (0,),
 (10,),
 (8,),
 (9,),
 (0,),
 (1251,),
 (255,),
 (81,),
 (37,),
 (4,),
 (8,),
 (61,),
 (2,),
 (126,),
 (8,),
 (18,),
 (45,),
 (0,),
 (24,),
 (49,),
 (1,),
 (2,),
 (5,),
 (6,),
 (1,),
 (6,),
 (1,),
 (4,),
 (6,),
 (0,),
 (2,),
 (0,),
 (2,),
 (23,),
 (17,),
 (30,),
 (0,),
 (16,),
 (0,),
 (0,),
 (3,),
 (1,),
 (121,),
 (0,),
 (3,),
 (0,),
 (2,),
 (0,),
 (33,),
 (25,),
 (2,),
 (0,),
 (31,),
 (16,),
 (4,),
 (5,),
 (18,),
 (181,),
 (5,),
 (3,),
 (199,),
 (0,),
 

In [26]:
# The query will return a series of floats, instead of rounding the values to integers.
conn.execute("SELECT population/1000000.0 FROM facts;").fetchall()

[(32.564342,),
 (3.029278,),
 (39.542166,),
 (0.08558,),
 (19.625353,),
 (0.092436,),
 (43.431886,),
 (3.056382,),
 (22.751014,),
 (8.66555,),
 (9.78078,),
 (0.324597,),
 (1.346613,),
 (168.957745,),
 (0.290604,),
 (9.589689,),
 (11.323973,),
 (0.347369,),
 (10.448647,),
 (0.741919,),
 (10.800882,),
 (3.867055,),
 (2.182719,),
 (204.259812,),
 (0.429646,),
 (7.186893,),
 (18.931686,),
 (56.320206,),
 (10.742276,),
 (15.708756,),
 (23.739218,),
 (35.099836,),
 (0.545993,),
 (5.391539,),
 (11.631456,),
 (17.50826,),
 (1367.485388,),
 (46.736728,),
 (0.780971,),
 (79.375136,),
 (4.755097,),
 (4.814144,),
 (23.295302,),
 (4.464844,),
 (11.031433,),
 (1.189197,),
 (10.644842,),
 (5.581503,),
 (0.828324,),
 (0.073607,),
 (10.478756,),
 (15.868396,),
 (88.487396,),
 (6.14135,),
 (0.740743,),
 (6.527689,),
 (1.26542,),
 (99.465819,),
 (0.909389,),
 (5.476922,),
 (66.553766,),
 (1.705336,),
 (1.967709,),
 (4.931226,),
 (80.854408,),
 (26.327649,),
 (10.775643,),
 (0.110694,),
 (14.918999,),
 (1

Here are the rules for what an arithmetic operation will return:
* Two floats - Returns a float (ex. SELECT birth_rate / 1000000.0 FROM facts;)
* A float and an integer - Returns a float (ex. SELECT population / 1000000.0 FROM facts;)
* Two integers - Returns an integer (ex. SELECT population / 1000000 FROM facts;)

In [27]:
# Use arithmetic operators in a SQL query to express population_growth in terms of millions.
population_growth_millions = conn.execute("SELECT population_growth/1000000.0 FROM facts;").fetchall()

print(population_growth_millions)

[(2.32e-06,), (3e-07,), (1.8400000000000002e-06,), (1.2e-07,), (2.7799999999999996e-06,), (1.24e-06,), (9.300000000000001e-07,), (1.5e-07,), (1.0700000000000001e-06,), (5.5e-07,), (9.6e-07,), (8.5e-07,), (2.4100000000000002e-06,), (1.6000000000000001e-06,), (3.1e-07,), (2.0000000000000002e-07,), (7.6e-07,), (1.87e-06,), (2.7799999999999996e-06,), (1.1100000000000002e-06,), (1.56e-06,), (1.3e-07,), (1.21e-06,), (7.7e-07,), (1.6200000000000002e-06,), (5.8e-07,), (3.03e-06,), (1.01e-06,), (3.28e-06,), (1.5800000000000001e-06,), (2.5899999999999998e-06,), (7.5e-07,), (1.3600000000000001e-06,), (2.13e-06,), (1.89e-06,), (8.2e-07,), (4.5000000000000003e-07,), (1.04e-06,), (1.77e-06,), (2.4500000000000003e-06,), (2e-06,), (1.22e-06,), (1.91e-06,), (1.3e-07,), (1.5e-07,), (1.4299999999999999e-06,), (1.6e-07,), (2.2e-07,), (2.2e-06,), (2.1e-07,), (1.23e-06,), (1.35e-06,), (1.79e-06,), (2.5e-07,), (2.5099999999999997e-06,), (2.25e-06,), (5.5e-07,), (2.8900000000000003e-06,), (6.7e-07,), (4.00000

In [28]:
# The query will divide each value in the birth_rate column by the corresponding value in the death_rate column.
conn.execute("SELECT birth_rate/death_rate FROM facts;").fetchall()

[(2.776817854571634,),
 (1.9635258358662613,),
 (5.491879350348029,),
 (1.168103448275862,),
 (3.3751087902523933,),
 (2.785588752196836,),
 (2.27012278308322,),
 (1.4571734475374731,),
 (1.7016806722689077,),
 (0.9989384288747346,),
 (2.3536067892503536,),
 (2.198581560283688,),
 (5.078066914498142,),
 (3.768270944741533,),
 (1.4063981042654028,),
 (0.8008982035928144,),
 (1.1848390446521286,),
 (4.134003350083752,),
 (4.387332521315469,),
 (2.6576980568011956,),
 (3.49079754601227,),
 (0.9097435897435897,),
 (1.5653472740851382,),
 (2.197568389057751,),
 (4.920454545454546,),
 (0.6177285318559557,),
 (3.58617747440273,),
 (2.3103015075376887,),
 (4.531823085221143,),
 (3.102864583333333,),
 (3.577645895153314,),
 (1.2209026128266032,),
 (3.3273322422258587,),
 (2.542028985507246,),
 (2.563025210084034,),
 (2.305,),
 (1.658698539176627,),
 (3.0499999999999994,),
 (3.677675033025099,),
 (3.4637537239324727,),
 (3.585,),
 (3.4967032967032967,),
 (3.0020942408376965,),
 (0.77586206896551

In [29]:
# The query will add together the birth_rate and migration_rate columns, then divide by the death_rate column.
conn.execute("SELECT (birth_rate+migration_rate)/death_rate FROM facts;").fetchall()

[(2.8855291576673863,),
 (2.465045592705167,),
 (5.705336426914155,),
 (1.168103448275862,),
 (3.4151436031331595,),
 (3.173989455184534,),
 (2.27012278308322,),
 (2.0781584582441113,),
 (2.492997198879552,),
 (1.589171974522293,),
 (2.3536067892503536,),
 (2.198581560283688,),
 (9.944237918215613,),
 (3.8502673796791442,),
 (1.4419431279620853,),
 (0.8532934131736526,),
 (1.794392523364486,),
 (4.134003350083752,),
 (4.387332521315469,),
 (2.6576980568011956,),
 (3.585889570552148,),
 (0.9487179487179487,),
 (1.905899925317401,),
 (2.218844984802432,),
 (5.610795454545454,),
 (0.6378116343490304,),
 (3.58617747440273,),
 (2.3454773869346734,),
 (4.531823085221143,),
 (3.14453125,),
 (3.5924826904055394,),
 (1.8931116389548692,),
 (3.4304418985270044,),
 (2.542028985507246,),
 (2.804621848739496,),
 (2.361666666666667,),
 (1.7171314741035855,),
 (3.168518518518518,),
 (4.011889035667107,),
 (3.4905660377358494,),
 (4.175,),
 (3.679120879120879,),
 (3.0020942408376965,),
 (0.88998357963

In [30]:
# Use a SQL query to compute the population of each country a year from now.
next_year_population = conn.execute("SELECT population*(1+population_growth/100.0) FROM facts;").fetchall()

print(next_year_population)

[(33319834.734400004,), (3038365.834,), (40269741.8544,), (85682.69600000001,), (20170937.8134,), (93582.2064,), (43835802.5398,), (3060966.5730000003,), (22994449.849799998,), (8713210.525,), (9874675.488,), (327356.0745,), (1379066.3733,), (171661068.92000002,), (291504.87240000005,), (9608868.378,), (11410035.1948,), (353864.8003,), (10739119.3866,), (750154.3009,), (10969375.759200001,), (3872082.1715,), (2209129.8999,), (205832612.55240002,), (436606.2652,), (7228576.979400001,), (19505316.0858,), (56889040.0806,), (11094622.6528,), (15956954.344800001,), (24354063.7462,), (35363084.77,), (553418.5048,), (5506378.780700001,), (11851290.518399999,), (17651827.732,), (1373639072.2459998,), (47222789.9712,), (794794.1867000001,), (81319826.832,), (4850198.94,), (4872876.5568,), (23740242.2682,), (4470648.2972,), (11047980.149500001,), (1206202.5171,), (10661873.747200001,), (5593782.3066,), (846547.128,), (73761.5747,), (10607644.6988,), (16082619.346,), (90071320.3884,), (6156703.37

It's often advantageous to do these computations in the SQL database instead of a Python environment because it's faster to code and execute.