# Set-Up

In [1]:
import sqlite3
import pandas as pd

In [2]:
conn = sqlite3.connect('../data/data.db')
c = conn.cursor()

def execute_statement(statement):
    c.execute(statement)
    res = c.fetchall()
    column_names = [description[0] for description in c.description]
    return pd.DataFrame(res, columns=column_names)

# GROUP BY
The GROUP BY statement can be used to group together rows that have the same value in a subset of columns.

In [7]:
execute_statement('SELECT Taxon, COUNT(Taxon) FROM cites GROUP BY Taxon ORDER BY COUNT(Taxon) DESC')

Unnamed: 0,Taxon,COUNT(Taxon)
0,Falco hybrid,87583
1,Falco rusticolus,47101
2,Psittacus erithacus,36359
3,Falco cherrug,18335
4,Falco peregrinus,18185
...,...,...
1730,Vidua togoensis,1
1731,Vini stepheni,1
1732,Vini ultramarina,1
1733,Vultur spp.,1


# Aggregate Functions
There are several aggregate functions that can be applied with GROUP BY, but some may depend on the flavour of SQL used. Aggregate functions include:
 - COUNT
 - AVG
 - SUM
 - MAX
 - MIN
 - STDEV (not supported by sqlite)

## COUNT
The COUNT aggregate function can be used to count the number of rows in each group.

In [10]:
execute_statement('SELECT Taxon, COUNT(Taxon) FROM cites GROUP BY Taxon ORDER BY COUNT(Taxon) DESC LIMIT 10')

Unnamed: 0,Taxon,COUNT(Taxon)
0,Falco hybrid,87583
1,Falco rusticolus,47101
2,Psittacus erithacus,36359
3,Falco cherrug,18335
4,Falco peregrinus,18185
5,Ara ararauna,15328
6,Agapornis personatus,9983
7,Grus canadensis,9324
8,Agapornis fischeri,9179
9,Amazona aestiva,8998


## AVG
The AVG aggregate function can be used to get the average value of a column in each group.

In [13]:
execute_statement('SELECT Family, AVG(Count) FROM (SELECT Taxon, Family, COUNT(Taxon) AS Count FROM cites GROUP BY Taxon) GROUP BY Family ORDER BY AVG(Count) DESC')

Unnamed: 0,Family,AVG(Count)
0,Falconidae,2541.828571
1,Cacatuidae,1142.666667
2,Psittacidae,860.683077
3,Spheniscidae,807.500000
4,Gruidae,612.150000
...,...,...
56,Atrichornithidae,2.000000
57,Laridae,2.000000
58,Picidae,2.000000
59,Rallidae,1.000000


## SUM
The SUM aggregate function can be used to get the total value of a column in each group.

In [15]:
execute_statement('SELECT Family, SUM(Count) FROM (SELECT Taxon, Family, COUNT(Taxon) AS Count FROM cites GROUP BY Taxon) GROUP BY Family ORDER BY SUM(Count) DESC')

Unnamed: 0,Family,SUM(Count)
0,Psittacidae,279722
1,Falconidae,177928
2,Cacatuidae,30852
3,Accipitridae,25524
4,Estrildidae,22618
...,...,...
56,Corvidae,3
57,Atrichornithidae,2
58,Laridae,2
59,Rallidae,2


## MAX
The MAX aggregate function can be used to get the maximum value of a column in each group.

In [16]:
execute_statement('SELECT Family, MAX(Count) FROM (SELECT Taxon, Family, COUNT(Taxon) AS Count FROM cites GROUP BY Taxon) GROUP BY Family ORDER BY MAX(Count) DESC')

Unnamed: 0,Family,MAX(Count)
0,Falconidae,87583
1,Psittacidae,36359
2,Gruidae,9324
3,Cacatuidae,6830
4,Estrildidae,5654
...,...,...
56,Picidae,3
57,Atrichornithidae,2
58,Laridae,2
59,Rallidae,1


## MIN
The MIN aggregate function can be used to get the minimum value of a column in each group.

In [17]:
execute_statement('SELECT Family, MIN(Count) FROM (SELECT Taxon, Family, COUNT(Taxon) AS Count FROM cites GROUP BY Taxon) GROUP BY Family ORDER BY MIN(Count) DESC')

Unnamed: 0,Family,MIN(Count)
0,Spheniscidae,509
1,Sagittariidae,193
2,Pandionidae,129
3,Balaenicipitidae,90
4,Trogonidae,68
...,...,...
56,Strigidae,1
57,Sturnidae,1
58,Threskiornithidae,1
59,Trochilidae,1
