# Introduction to joins

## Introduction to INNER JOIN

In [None]:
SELECT *
FROM left_table
INNER JOIN right_table
ON left_table.id = right_table.id;
    # basic inner join

SELECT c1.name AS city, c2.name AS country
FROM cities AS c1
INNER JOIN countries AS c2
ON c1.country_code = c2.code;
    # inner join and alias
    
SELECT *
FROM left_table
INNER JOIN right_table
ON left_table.id = right_table.id
INNER JOIN table1
ON left_table.id = table1.id;
    # chaining inner join

## INNER JOIN via USING

In [None]:
SELECT left_table.id AS L_id, 
        left_table.val AS L_val, 
        right_table.val AS R_val
FROM left_table
INNER JOIN right_table
ON left_table.id = right_table.id;
    # same as 
SELECT left_table.id AS L_id, 
        left_table.val AS L_val, 
        right_table.val AS R_val
FROM left_table
INNER JOIN right_table 
USING (id);

## Self-ish joins, just in CASE

In [None]:
SELECT p1.country AS country1, 
        p2.country AS country2, 
        p1.continent 
FROM prime_ministers AS p1
INNER JOIN prime_ministers AS p2
ON p1.continent = p2.continent
LIMIT 14;

SELECT p1.country AS country1, 
        p2.country AS country2, 
        p1.continent 
FROM prime_ministers AS p1
INNER JOIN prime_ministers AS p2
ON p1.continent = p2.continent AND p1.country <> p2.country
    # exclude rows where p1.country = p2.country
LIMIT 13;


SELECT p1.country_code,
       p1.size AS size2010,
       p2.size AS size2015,
       ((p2.size-p1.size)/p1.size)*100 AS growth_perc
FROM populations AS p1
INNER JOIN populations AS p2
ON (p1.country_code = p2.country_code) AND (p1.year = p2.year-5);
    # use self-join to calculate a new column

Often it's useful to look at a numerical field not as raw data, but instead as being in different categories or groups.

You can use CASE with WHEN, THEN, ELSE, and END to define a new grouping field.



In [None]:
SELECT name, continent, indep_year,
    CASE WHEN indep_year < 1900 THEN 'before 1900'
        WHEN indep_year <= 1930 THEN 'between 1900 and 1930'
        ELSE 'after 1930' END 
    AS indep_year_group FROM states
ORDER BY indep_year_group;


SELECT name, continent, code, surface_area,
    CASE WHEN surface_area > 2000000
            THEN 'large'
       WHEN surface_area > 350000
            THEN 'medium'
       ELSE 'small' END
       AS geosize_group
INTO countries_plus  # (opt) save results in a new table
FROM countries;
    # use case to create binning

# Outer joins and cross joins

## LEFT and RIGHT JOINs

In [None]:
SELECT c1.name AS city, code, 
        c2.name AS country,
        region, city_proper_pop
FROM cities AS c1
LEFT JOIN countries AS c2
ON c1.country_code = c2.code
ORDER BY code DESC;
    # left join only

In [None]:
SELECT region, AVG(gdp_percapita) AS avg_gdp
FROM countries AS c
LEFT JOIN economies AS e
ON c.code = e.code
WHERE e.year = 2010
GROUP BY region
ORDER BY avg_gdp DESC;
    # left join and group by

## FULL JOINs

In [None]:
SELECT name AS country, code, region, basic_unit
FROM countries
FULL JOIN currencies
USING (code)
WHERE region = 'North America' OR region IS NULL 
ORDER BY region;

In [None]:
SELECT c1.name AS country, region, l.name AS language,
       c2.basic_unit, c2.frac_unit
FROM countries AS c1
FULL JOIN languages AS l
USING (code)
FULL JOIN currencies AS c2
USING (code)
WHERE region LIKE 'M%esia';

## CROSSing the rubicon

In [None]:
SELECT c.name AS city, l.name AS language
FROM cities AS c
CROSS JOIN languages AS l
WHERE c.name LIKE 'Hyder%';

# Set theory clauses

* img 
![Screen Shot 2017-09-13 at 14.19.33.png](http://upload-images.jianshu.io/upload_images/1526845-2cd64c41b2088f87.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)

## State of the UNION

In [None]:
SELECT * 
FROM economies2010
UNION
SELECT * 
FROM economies2015
ORDER BY code, year;

## INTERSECTional data science

In [None]:
SELECT code, year 
FROM economies
INTERSECT
SELECT country_code, year
FROM populations;

## EXCEPTional

In [None]:
SELECT capital
FROM countries
EXCEPT
SELECT name
FROM cities
ORDER BY capital;

## Semi-joins and Anti-joins

In [None]:
# semi-join, no built-in code in SQL
SELECT DISTINCT name
FROM languages
WHERE code IN
    (
    SELECT code
    FROM countries
    WHERE region = 'Middle East'
    )
ORDER BY name;
    # select cols of left table 
    # based on vals of the right table

# Subqueries

## Subqueries inside WHERE and SELECT clauses

## Subquery inside FROM clause

## Course review