### Applying Filtering, Ordering and Grouping to Queries

In [1]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///census.sqlite')
connection = engine.connect()

In [2]:
from sqlalchemy import Table, MetaData, select
metadata = MetaData()
census = Table('census', metadata, autoload=True, autoload_with=engine)
stmt = select([census])

In [3]:
stmt = stmt.where(census.columns.state == 'California')
results = connection.execute(stmt).fetchall()

In [4]:
for result in results[:5]:
    print(result.state, result.age)

California 0
California 1
California 2
California 3
California 4


### Where Clauses
- Restrict data returned by a query based on boolean conditions
- Compare a column against a value or another column
- Often used comparisions: '==', '<=', '>=', '!='

### Expressions
- Provide more complex conditions than simple operators
- Eg `in_()`, `like()`, `between()`
- Many more in documentation
- Available as method on a Column

In [5]:
stmt = select([census])
stmt = stmt.where(
            census.columns.state.startswith('New'))
for result in connection.execute(stmt):
    print(result.state, result.pop2000)

New Jersey 56983
New Jersey 56686
New Jersey 57011
New Jersey 57912
New Jersey 59359
New Jersey 60392
New Jersey 60739
New Jersey 61579
New Jersey 62089
New Jersey 63384
New Jersey 64593
New Jersey 62494
New Jersey 60570
New Jersey 58189
New Jersey 57931
New Jersey 56752
New Jersey 55887
New Jersey 56712
New Jersey 53265
New Jersey 48344
New Jersey 49564
New Jersey 48141
New Jersey 48353
New Jersey 49130
New Jersey 48723
New Jersey 51092
New Jersey 50357
New Jersey 52179
New Jersey 55938
New Jersey 60638
New Jersey 64220
New Jersey 61484
New Jersey 61927
New Jersey 63487
New Jersey 66737
New Jersey 71274
New Jersey 71449
New Jersey 71026
New Jersey 71811
New Jersey 72043
New Jersey 73665
New Jersey 70514
New Jersey 69835
New Jersey 68650
New Jersey 65564
New Jersey 65529
New Jersey 61817
New Jersey 59206
New Jersey 56977
New Jersey 55780
New Jersey 56235
New Jersey 53235
New Jersey 54900
New Jersey 58013
New Jersey 43584
New Jersey 42959
New Jersey 42179
New Jersey 45575
New Jersey 383

### Conjunctions
- Allow us to have multiple criteria in a where clause
- Eg `and_()`, `not_()`, `or_()`

In [6]:
from sqlalchemy import or_
stmt = select([census])
stmt = stmt.where(
            or_(census.columns.state == 'California',
               census.columns.state == 'New York'
    )
)

In [7]:
for result in connection.execute(stmt):
    print(result.state, result.sex)

New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
New York M
California M
California M
California M
California M
California M
California M
California M
Califor

In [8]:
# # Import create_engine function
# from sqlalchemy import create_engine

# # Create an engine to the census database
# engine = create_engine('postgresql+psycopg2://student:datacamp@postgresql.csrrinzqubik.us-east-1.rds.amazonaws.com:5432/census')

# # Use the .table_names() method on the engine to print the table names
# print(engine.table_names())

In [12]:
# Create a select query: stmt
stmt = select([census])

# Add a where clause to filter the results to only those for New York
stmt = stmt.where(census.columns.state == 'New York')

# Execute the query to retrieve all the data returned: results
results = connection.execute(stmt).fetchall()

# Loop over the results and print the age, sex, and pop2008
for result in results:
    print(result.age, result.sex, result.pop2008)


0 M 128088
1 M 125649
2 M 121615
3 M 120580
4 M 122482
5 M 121205
6 M 120089
7 M 122355
8 M 118653
9 M 117369
10 M 118810
11 M 121121
12 M 126338
13 M 128713
14 M 129812
15 M 134463
16 M 136569
17 M 140114
18 M 156892
19 M 147556
20 M 146611
21 M 141932
22 M 138557
23 M 136150
24 M 132383
25 M 141850
26 M 129603
27 M 131419
28 M 127224
29 M 122449
30 M 126404
31 M 126124
32 M 123362
33 M 126486
34 M 120030
35 M 123017
36 M 127076
37 M 136270
38 M 144715
39 M 135027
40 M 135355
41 M 132905
42 M 140025
43 M 151555
44 M 149030
45 M 148147
46 M 146692
47 M 147648
48 M 155155
49 M 144287
50 M 143466
51 M 139630
52 M 133939
53 M 136723
54 M 125953
55 M 122478
56 M 118070
57 M 115823
58 M 117177
59 M 108293
60 M 106825
61 M 113681
62 M 83763
63 M 81226
64 M 76961
65 M 82242
66 M 70423
67 M 64117
68 M 63657
69 M 58801
70 M 57609
71 M 53231
72 M 51132
73 M 50696
74 M 44822
75 M 43592
76 M 41900
77 M 40417
78 M 40241
79 M 35941
80 M 34659
81 M 32022
82 M 28890
83 M 27217
84 M 23879
85 M 124478
0

In [16]:
stmt = select([census])
stmt = stmt.where(in_(states == census.columns.state))
for result in connection.execute(stmt):
    print(result.state, result.pop2000)

NameError: name 'in_' is not defined

In [21]:
states = ['New York', 'California', 'Texas']
# Create a query for the census table: stmt
stmt = select([census])

# Append a where clause to match all the states in_ the list states
stmt = stmt.where(census.columns.state.in_(states))

# Loop over the ResultProxy and print the state and its population in 2000
for result in connection.execute(stmt):
    print(result.state, result.pop2000)

New York 126237
New York 124008
New York 124725
New York 126697
New York 131357
New York 133095
New York 134203
New York 137986
New York 139455
New York 142454
New York 145621
New York 138746
New York 135565
New York 132288
New York 132388
New York 131959
New York 130189
New York 132566
New York 132672
New York 133654
New York 132121
New York 126166
New York 123215
New York 121282
New York 118953
New York 123151
New York 118727
New York 122359
New York 128651
New York 140687
New York 149558
New York 139477
New York 138911
New York 139031
New York 145440
New York 156168
New York 153840
New York 152078
New York 150765
New York 152606
New York 159345
New York 148628
New York 147892
New York 144195
New York 139354
New York 141953
New York 131875
New York 128767
New York 125406
New York 124155
New York 125955
New York 118542
New York 118532
New York 124418
New York 95025
New York 92652
New York 90096
New York 95340
New York 83273
New York 77213
New York 77054
New York 72212
New York 70967
N

In [22]:
# Import and_
from sqlalchemy import and_

# Build a query for the census table: stmt
stmt = select([census])

# Append a where clause to select only non-male records from California using and_
stmt = stmt.where(
    # The state of California with a non-male sex
    and_(census.columns.state == 'California',
         census.columns.sex != 'M'
         )
)

# Loop over the ResultProxy printing the age and sex
for result in connection.execute(stmt):
    print(result.age , result.sex)

0 F
1 F
2 F
3 F
4 F
5 F
6 F
7 F
8 F
9 F
10 F
11 F
12 F
13 F
14 F
15 F
16 F
17 F
18 F
19 F
20 F
21 F
22 F
23 F
24 F
25 F
26 F
27 F
28 F
29 F
30 F
31 F
32 F
33 F
34 F
35 F
36 F
37 F
38 F
39 F
40 F
41 F
42 F
43 F
44 F
45 F
46 F
47 F
48 F
49 F
50 F
51 F
52 F
53 F
54 F
55 F
56 F
57 F
58 F
59 F
60 F
61 F
62 F
63 F
64 F
65 F
66 F
67 F
68 F
69 F
70 F
71 F
72 F
73 F
74 F
75 F
76 F
77 F
78 F
79 F
80 F
81 F
82 F
83 F
84 F
85 F


### Ordering Query Results

#### Order by Clauses
- Allows us to control the order in which records are returned in the query results
- Available as a method on statements `order_by()`

In [23]:
print(results[:10])

[('New York', 'M', 0, 126237, 128088), ('New York', 'M', 1, 124008, 125649), ('New York', 'M', 2, 124725, 121615), ('New York', 'M', 3, 126697, 120580), ('New York', 'M', 4, 131357, 122482), ('New York', 'M', 5, 133095, 121205), ('New York', 'M', 6, 134203, 120089), ('New York', 'M', 7, 137986, 122355), ('New York', 'M', 8, 139455, 118653), ('New York', 'M', 9, 142454, 117369)]


In [24]:
stmt = select([census.columns.state])
stmt = stmt.order_by(census.columns.state)
results = connection.execute(stmt).fetchall()
print(results[:10])

[('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',)]


#### Order by Descending
- Wrap the column with `desc()` in the `order_by()` clause

#### Order by Multiple
- Just separate multiple columns with a comma
- Orders completely by the first column
- Then if there are duplicates in the first column, orders by the second column
- Repeats until all columns are ordered

In [28]:
stmt = select([census.columns.state, census.columns.sex])
stmt = stmt.order_by(census.columns.state, census.columns.sex)
results = connection.execute(stmt).first()
print(results)

('Alabama', 'F')


In [29]:
# Build a query to select the state column: stmt
stmt = select([census.columns.state])

# Order stmt by the state column
stmt = stmt.order_by(census.columns.state)

# Execute the query and store the results: results
results = connection.execute(stmt).fetchall()

# Print the first 10 results
print(results[:10])


[('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',), ('Alabama',)]


In [30]:
# Import desc
from sqlalchemy import desc

# Build a query to select the state column: stmt
stmt = select([census.columns.state])

# Order stmt by state in descending order: rev_stmt
rev_stmt = stmt.order_by(desc(census.columns.state))

# Execute the query and store the results: rev_results
rev_results = connection.execute(rev_stmt).fetchall()

# Print the first 10 rev_results
print(rev_results[:10])

[('Wyoming',), ('Wyoming',), ('Wyoming',), ('Wyoming',), ('Wyoming',), ('Wyoming',), ('Wyoming',), ('Wyoming',), ('Wyoming',), ('Wyoming',)]


In [31]:
# Build a query to select state and age: stmt
stmt = select([census.columns.state, census.columns.age])

# Append order by to ascend by state and descend by age
stmt = stmt.order_by(census.columns.state, desc(census.columns.age) )

# Execute the statement and store all the records: results
results = connection.execute(stmt).fetchall()

# Print the first 20 results
print(results[:20])

[('Alabama', 85), ('Alabama', 85), ('Alabama', 84), ('Alabama', 84), ('Alabama', 83), ('Alabama', 83), ('Alabama', 82), ('Alabama', 82), ('Alabama', 81), ('Alabama', 81), ('Alabama', 80), ('Alabama', 80), ('Alabama', 79), ('Alabama', 79), ('Alabama', 78), ('Alabama', 78), ('Alabama', 77), ('Alabama', 77), ('Alabama', 76), ('Alabama', 76)]
