## Queries

In [1]:
from sqlalchemy import create_engine
import pandas as pd
from warnings import filterwarnings
import pymysql
filterwarnings('ignore', category=pymysql.Warning)
import os

In [2]:
engine = create_engine('mysql+pymysql://root:kcmo1728@localhost/birdsong')  # connect to server

In [3]:
new_df = pd.read_sql_query('select * from birds', engine)
new_df.head()

Unnamed: 0,file_id,genus,species,english_cname,who_provided_recording,country,latitude,longitute,type,license
0,27039,Caprimulgus,europaeus,European Nightjar,Patrik berg,Sweden,58.4392,14.3298,Song,http://creativecommons.org/licenses/by-nc-sa/3.0/
1,27145,Corvus,frugilegus,Rook,Patrik berg,Sweden,55.6289,13.7128,Call,http://creativecommons.org/licenses/by-nc-sa/3.0/
2,27999,Acrocephalus,scirpaceus,Eurasian Reed Warbler,Stuart Fisher,United Kingdom,51.2887,-0.5267,Song,http://creativecommons.org/licenses/by-nc-nd/2.5/
3,28961,Chloris,chloris,European Greenfinch,Stuart Fisher,United Kingdom,51.5278,-0.01,song,http://creativecommons.org/licenses/by-nc-nd/2.5/
4,30390,Erithacus,rubecula,European Robin,Stuart Fisher,United Kingdom,51.317,-0.5592,song,http://creativecommons.org/licenses/by-nc-nd/2.5/


* In order to select specific rows from a table based upon some values stored within a column, a `WHERE <column> = <value>` statement is added into the code.

* For example, to select only those rows within the "birdsongs" table whose "genus" is "Acanthis", `SELECT * FROM birdsong WHERE genus = "Acanthis"` would be used.


In [4]:
new_df = pd.read_sql_query('select * from birds where genus = "Acanthis"', engine)
new_df.head()

Unnamed: 0,file_id,genus,species,english_cname,who_provided_recording,country,latitude,longitute,type,license
0,35068,Acanthis,flammea,Common Redpoll,Sander Bot,Netherlands,52.8176,6.4326,"call, song",http://creativecommons.org/licenses/by-nc-nd/2.5/
1,132608,Acanthis,flammea,Common Redpoll,Jarek Matusiak,Poland,50.7932,15.4995,"female, male, song",http://creativecommons.org/licenses/by-nc-sa/3.0/
2,132611,Acanthis,flammea,Common Redpoll,Jarek Matusiak,Poland,50.7932,15.4995,"flight call, male, song",http://creativecommons.org/licenses/by-nc-sa/3.0/


* There may be cases in which users desire to search for rows where two conditions or more are met. MySQL allows this by using the `AND` statement.

* For example, to select rows within the "birdsongs" table whose "genus" is "Acanthis" and whose "country" is "Netherlands, `SELECT * FROM birdsong WHERE genus = "Acanthis" AND country ="Netherlands"` would be used.

In [5]:
new_df = pd.read_sql_query('select * from birds where genus = "Acanthis" and country = "Netherlands"', engine)
new_df.head()

Unnamed: 0,file_id,genus,species,english_cname,who_provided_recording,country,latitude,longitute,type,license
0,35068,Acanthis,flammea,Common Redpoll,Sander Bot,Netherlands,52.8176,6.4326,"call, song",http://creativecommons.org/licenses/by-nc-nd/2.5/


    * `OR` statements can also be used when creating queries to search for rows whose values match in one way or another, thus widening the search.

    * It is also possible to search for results by excluding certain criteria by using the `WHERE NOT` statement.


In [6]:
new_df = pd.read_sql_query('select * from birds where genus = "Acanthis" and not country = "Netherlands"', engine)
new_df.head()

Unnamed: 0,file_id,genus,species,english_cname,who_provided_recording,country,latitude,longitute,type,license
0,132608,Acanthis,flammea,Common Redpoll,Jarek Matusiak,Poland,50.7932,15.4995,"female, male, song",http://creativecommons.org/licenses/by-nc-sa/3.0/
1,132611,Acanthis,flammea,Common Redpoll,Jarek Matusiak,Poland,50.7932,15.4995,"flight call, male, song",http://creativecommons.org/licenses/by-nc-sa/3.0/


### Select Distinct

In [7]:
sql_query = """
select distinct who_provided_recording from birds 
"""
new_df = pd.read_sql_query(sql_query, engine)
new_df.head()

Unnamed: 0,who_provided_recording
0,Patrik berg
1,Stuart Fisher
2,Ruud van Beusekom
3,Rombout de Wijs
4,Mark Harper


### Aggregation - Count, Sum, Average

In [8]:
sql_query = """
select count(*)
from birds
"""

new_df = pd.read_sql_query(sql_query, engine)
new_df.head()

# so we have 100 rows.

Unnamed: 0,count(*)
0,100


In [9]:
sql_query = """
select count(*)
from birds
where country = "Poland"
"""

new_df = pd.read_sql_query(sql_query, engine)
new_df.head()

Unnamed: 0,count(*)
0,7


In [10]:
sql_query = """
select avg(latitude)
from birds
where country = "Poland"
"""

new_df = pd.read_sql_query(sql_query, engine)
new_df.head()

Unnamed: 0,avg(latitude)
0,51.3878


## Group by

In [11]:
sql_query = """
select distinct country
from birds
"""

new_df = pd.read_sql_query(sql_query, engine)
new_df

Unnamed: 0,country
0,Sweden
1,United Kingdom
2,Netherlands
3,Poland
4,Germany
5,Spain
6,Iceland
7,Belgium
8,Italy
9,France


In [12]:
sql_query = """
select count(country) from birds;
"""

new_df = pd.read_sql_query(sql_query, engine)
new_df.head(10)

Unnamed: 0,count(country)
0,100


In [13]:
sql_query = """
select count(distinct country) from birds;
"""

new_df = pd.read_sql_query(sql_query, engine)
new_df.head(10)

Unnamed: 0,count(distinct country)
0,11


* the COUNT() function in conjunction with GROUP BY counts the number of birds from each country.



In [14]:
sql_query = """
select country, count(*) as number from birds
group by country
order by number desc
"""

new_df = pd.read_sql_query(sql_query, engine)
new_df.head(10)

Unnamed: 0,country,number
0,United Kingdom,48
1,Sweden,11
2,Netherlands,10
3,France,8
4,Poland,7
5,Germany,7
6,Spain,3
7,Italy,2
8,Norway,2
9,Belgium,1


In [15]:
sql_query = """
select country, count(*) as 'number'
from birds
group by country
having number >= 8
order by number desc
"""

new_df = pd.read_sql_query(sql_query, engine)
new_df.head(10)

Unnamed: 0,country,number
0,United Kingdom,48
1,Sweden,11
2,Netherlands,10
3,France,8


In [16]:
new_df = pd.read_sql_query('select * from birds', engine)
new_df['country'].value_counts()

United Kingdom    48
Sweden            11
Netherlands       10
France             8
Poland             7
Germany            7
Spain              3
Italy              2
Norway             2
Iceland            1
Belgium            1
Name: country, dtype: int64