### Introdution to joins

In [1]:
import pandas as pd
import sqlalchemy
import sqlite3
import os

In [2]:
conn = sqlite3.connect('leaders.sqlite')
cur = conn.cursor()

In [3]:
def sql_query(query):
    c = cur.execute(query)
    df = pd.DataFrame(c.fetchall(), 
                      columns=[description[0] for description in c.description])
    return df

In [4]:
sql_query('select * from presidents')

Unnamed: 0,country,continent,president
0,Egypt,Africa,Abdel Fattah el-Sisi
1,Portugal,Europe,Marcelo Rebelo de Sousa
2,Haiti,North America,Jovenel Moise
3,Uruguay,South America,Jose Mujica
4,Liberia,Africa,Ellen Johnson Sirleaf
5,Chile,South America,Michelle Bachelet
6,Vietnam,Asia,Tran Dai Quang


In [5]:
sql_query('''SELECT p1.country, p1.continent,
                    prime_minister, president
            FROM prime_ministers AS p1
            INNER JOIN presidents AS p2
            ON p1.country=p2.country''')

Unnamed: 0,country,continent,prime_minister,president
0,Egypt,Africa,Sherif Ismail,Abdel Fattah el-Sisi
1,Portugal,Europe,Antonio Costa,Marcelo Rebelo de Sousa
2,Vietnam,Asia,Nguyen Xuan Phuc,Tran Dai Quang
3,Haiti,North America,Jack Guy Lafontant,Jovenel Moise


In [6]:
conn.close()

In [7]:
conn = sqlite3.connect('countries.sqlite')
cur = conn.cursor()

In [8]:
sql_query('SELECT * FROM countries').head(3)

Unnamed: 0,code,country_name,continent,region,surface_area,indep_year,local_name,gov_form,capital,cap_long,cap_lat
0,AFG,Afghanistan,Asia,Southern and Central Asia,652090.0,1919,Afganistan/Afqanestan,Islamic Emirate,Kabul,69.1761,34.5228
1,NLD,Netherlands,Europe,Western Europe,41526.0,1581,Nederland,Constitutional Monarchy,Amsterdam,4.89095,52.3738
2,ALB,Albania,Europe,Southern Europe,28748.0,1912,Shqiperia,Republic,Tirane,19.8172,41.3317


In [9]:
sql_query('''SELECT cities.name AS city, countries.country_name AS country, region
            FROM cities
            INNER JOIN countries
            ON cities.country_code = countries.code;''').head(3)

Unnamed: 0,city,country,region
0,Abidjan,Cote d'Ivoire,Western Africa
1,Abu Dhabi,United Arab Emirates,Middle East
2,Abuja,Nigeria,Western Africa


In [10]:
sql_query('SELECT * FROM economies').head(3)

Unnamed: 0,econ_id,code,year,income_group,gdp_percapita,gross_savings,inflation_rate,total_investment,unemployment_rate,exports,imports
0,1,AFG,2010,Low income,539.667,37.133,2.179,30.402,,46.394,24.381
1,2,AFG,2015,Low income,615.091,21.466,-1.549,18.602,,-49.11,-7.294
2,3,AGO,2010,Upper middle income,3599.27,23.534,14.48,14.433,,-3.266,-21.076


In [11]:
sql_query('''SELECT c.code AS country_code, country_name AS name, year, inflation_rate
            FROM countries AS c
            INNER JOIN economies AS e
            ON c.code = e.code;''').head(3)

Unnamed: 0,country_code,name,year,inflation_rate
0,AFG,Afghanistan,2010,2.179
1,AFG,Afghanistan,2015,-1.549
2,NLD,Netherlands,2010,0.932


In [12]:
sql_query('''-- 4. Select fields
            SELECT c.code, e.code, country_name AS name, region, e.year, fertility_rate, unemployment_rate
              -- 1. From countries (alias as c)
              FROM countries as c
              -- 2. Join with populations (as p)
              INNER JOIN populations as p
                -- 3. Match on country code
                ON c.code = p.country_code
                  -- 4. Join to economies (as e)
                INNER JOIN economies as e
                  -- 5. Match on country code
                ON c.code = e.code;''').head(3)

Unnamed: 0,code,code.1,name,region,year,fertility_rate,unemployment_rate
0,AFG,AFG,Afghanistan,Southern and Central Asia,2010,4.653,
1,AFG,AFG,Afghanistan,Southern and Central Asia,2015,4.653,
2,AFG,AFG,Afghanistan,Southern and Central Asia,2010,5.746,


In [13]:
conn.close()
conn = sqlite3.connect('diagrams.sqlite')
cur = conn.cursor()

In [14]:
sql_query('''SELECT left_table.id as L_id,
                    left_table.val as L_val,
                    right_table.val as R_val
            FROM left_table
            INNER JOIN right_table
            ON left_table.id = right_table.id''')

Unnamed: 0,L_id,L_val,R_val
0,1,L1,R1
1,4,L4,R2


### The INNER JOIN with USING

In [15]:
sql_query('''SELECT left_table.id AS L_id,
                    left_table.val AS L_val, 
                    right_table.val AS R_val
            FROM left_table
            INNER JOIN right_table
            USING (id)''')

Unnamed: 0,L_id,L_val,R_val
0,1,L1,R1
1,4,L4,R2


In [16]:
conn.close()

In [17]:
conn = sqlite3.connect('leaders.sqlite')
cur = conn.cursor()

In [18]:
sql_query('''SELECT p1.country, p1.continent, prime_minister, president
FROM presidents AS p1
INNER JOIN prime_ministers as p2
USING (country)''')

Unnamed: 0,country,continent,prime_minister,president
0,Egypt,Africa,Sherif Ismail,Abdel Fattah el-Sisi
1,Portugal,Europe,Antonio Costa,Marcelo Rebelo de Sousa
2,Haiti,North America,Jack Guy Lafontant,Jovenel Moise
3,Vietnam,Asia,Nguyen Xuan Phuc,Tran Dai Quang


In [19]:
conn.close()

In [20]:
conn = sqlite3.connect('countries.sqlite')
cur = conn.cursor()

In [21]:
sql_query('''-- 4. Select fields
SELECT country_name AS country, continent, name AS language, official
  -- 1. From countries (alias as c)
  FROM countries AS c
  -- 2. Join to languages (as l)
  INNER JOIN languages AS l
    -- 3. Match using code
    USING (code);''').head(3)

Unnamed: 0,country,continent,language,official
0,Afghanistan,Asia,Dari,True
1,Afghanistan,Asia,Other,False
2,Afghanistan,Asia,Pashto,True


In [22]:
conn.close()

### Self-ish joins, just in CASE

#### Join prime_minsters to itself?

In [23]:
conn = sqlite3.connect('leaders.sqlite')
cur = conn.cursor()

In [24]:
sql_query('''SELECT p1.country AS country1, p2.country AS country2, p1.continent
FROM  prime_ministers AS p1
INNER JOIN prime_ministers as p2
ON p1.continent = p2.continent AND p1.country <> p2.country
LIMIT 14;''')

Unnamed: 0,country1,country2,continent
0,Portugal,Norway,Europe
1,Portugal,Spain,Europe
2,Vietnam,Brunei,Asia
3,Vietnam,India,Asia
4,Vietnam,Oman,Asia
5,India,Brunei,Asia
6,India,Oman,Asia
7,India,Vietnam,Asia
8,Norway,Portugal,Europe
9,Norway,Spain,Europe


In [25]:
conn.close()

### CASE WHEN and THEN

In [26]:
conn = sqlite3.connect('countries.sqlite')
cur = conn.cursor()

In [27]:
sql_query('''SELECT p1.country_code,
       p1.size AS size2010,
       p2.size AS size2015,
	   -- 1. calculate growth_perc
       ((p2.size - p1.size)/p1.size * 100.0) AS growth_perc
-- 2. From populations (alias as p1)
FROM populations as p1
  -- 3. Join to itself (alias as p2)
  INNER JOIN populations as p2
    -- 4. Match on country code
    ON p1.country_code = p2.country_code
        -- 5. and year (with calculation)
		AND p1.year = p2.year - 5''').head(3)

Unnamed: 0,country_code,size2010,size2015,growth_perc
0,ABW,101597,103889.0,0.0
1,AFG,27962207,32526562.0,0.0
2,AGO,21219954,25021974.0,0.0


In [30]:
# cur.execute('''
#     CREATE TABLE countries_plus AS
#     SELECT country_name, continent, code, surface_area,
#     -- 1. First case
#     CASE WHEN surface_area > 2000000 THEN 'large'
#         -- 2. Second case
#         WHEN surface_area > 350000 THEN 'medium'
#         -- 3. Else clause + end
#         ELSE 'small' END
#         -- 4. Alias name
#         AS geosize_group
# -- 5. From table
# FROM countries;''')

In [32]:
sql_query('''SELECT country_code, size,
    -- 1. First case
    CASE WHEN size > 50000000 THEN 'large'
        -- 2. Second case
        WHEN size > 1000000 THEN 'medium'
        -- 3. Else clause + end
        ELSE 'small' END
        -- 4. Alias name
        AS popsize_group
-- 5. From table
FROM populations
-- 6. Focus on 2015
WHERE year = 2015;''').head(3)

Unnamed: 0,country_code,size,popsize_group
0,ABW,103889.0,small
1,AFG,32526562.0,medium
2,AGO,25021974.0,medium


In [36]:
# cur.execute('''CREATE TABLE pop_plus AS
# SELECT country_code, size,
#     CASE WHEN size > 50000000 THEN 'large'
#         WHEN size > 1000000 THEN 'medium'
#         ELSE 'small' END
#         AS popsize_group
# FROM populations
# WHERE year = 2015;''')

#sql_query('''SELECT * FROM pop_plus''').head(3)

In [None]:
sql_query('''-- 5. Select fields
    SELECT country_name, continent, geosize_group, popsize_group
    -- 1. From countries_plus (alias as c)
    FROM countries_plus AS c
      -- 2. Join to pop_plus (alias as p)
      INNER JOIN pop_plus AS p
        -- 3. Match on country code
        ON c.code = p.country_code
    -- 4. Order the table    
ORDER BY geosize_group;''')