### Joining data enables us to draw information from separate tables together into a single, meaningful set of results

In [74]:
# create database and tables
import sqlite3
import pandas as pd

con = sqlite3.connect("countries.db")
cur = con.cursor()

cur.execute("DROP TABLE IF EXISTS cities")
cities = pd.read_csv("countries/cities.csv", header=0)
cities.to_sql('cities', con, if_exists='append', index = False, index_label='name', chunksize = 10000)

cur.execute("DROP TABLE IF EXISTS countries")
countries = pd.read_csv("countries/countries.csv", header=0)
countries.to_sql('countries', con, if_exists='append', index = False, index_label='code', chunksize = 10000)

cur.execute("DROP TABLE IF EXISTS economies")
economies = pd.read_csv("countries/economies.csv", header=0)
economies.to_sql('economies', con, if_exists='append', index = False, index_label='econ_id', chunksize = 10000)

cur.execute("DROP TABLE IF EXISTS languages")
economies = pd.read_csv("countries/languages.csv", header=0)
economies.to_sql('languages', con, if_exists='append', index = False, index_label='lang_id', chunksize = 10000)

con.close()

In [75]:
# connect to database
con = sqlite3.connect("countries.db")
cur = con.cursor()

In [76]:
# Select all columns from cities
pd.read_sql("""SELECT * 
               FROM cities;""",
            con)

Unnamed: 0,name,country_code,city_proper_pop,metroarea_pop,urbanarea_pop
0,Abidjan,CIV,4765000,,4765000
1,Abu Dhabi,ARE,1145000,,1145000
2,Abuja,NGA,1235880,6000000.0,1235880
3,Accra,GHA,2070463,4010054.0,2070463
4,Addis Ababa,ETH,3103673,4567857.0,3103673
...,...,...,...,...,...
231,Yerevan,ARM,1060138,,1060138
232,Yokohama,JPN,3726167,,3726167
233,Zhengzhou,CHN,4122087,,4122087
234,Zhongshan,CHN,3121275,,3121275


In [77]:
# Inner join to countries
pd.read_sql("""SELECT cities.name AS city, country_name AS country, region
               FROM cities
               INNER JOIN countries
               ON cities.country_code = countries.code;""",
            con)

Unnamed: 0,city,country,region
0,Abidjan,Cote d'Ivoire,Western Africa
1,Abu Dhabi,United Arab Emirates,Middle East
2,Abuja,Nigeria,Western Africa
3,Accra,Ghana,Western Africa
4,Addis Ababa,Ethiopia,Eastern Africa
...,...,...,...
225,Yerevan,Armenia,Middle East
226,Yokohama,Japan,Eastern Asia
227,Zhengzhou,China,Eastern Asia
228,Zhongshan,China,Eastern Asia


In [78]:
# use data from both the countries and economies tables to examine the inflation rate in 2010 and 2015
pd.read_sql("""SELECT c.code AS country_code, country_name, year, inflation_rate
               FROM countries AS c
               JOIN economies AS e 
               ON c.code = e.code;""",
            con)

Unnamed: 0,country_code,country_name,year,inflation_rate
0,AFG,Afghanistan,2010,2.179
1,AFG,Afghanistan,2015,-1.549
2,NLD,Netherlands,2010,0.932
3,NLD,Netherlands,2015,0.220
4,ALB,Albania,2010,3.605
...,...,...,...,...
363,EST,Estonia,2015,0.068
364,USA,United States,2010,1.637
365,USA,United States,2015,0.120
366,ZWE,Zimbabwe,2010,3.045


### When both the field names being joined on are the same, you can take advantage of the USING clause

A parting word of caution when using USING: columns can sometimes have the same name but actually contain vastly different data. Always remember to check what you are joining on by displaying and viewing your data first!

In [92]:
# Which languages are official languages, and which ones are unofficial?

pd.read_sql("""SELECT country_name AS country, l.name AS language, official
               FROM countries AS c
               INNER JOIN languages AS l
               USING(code);""",
            con)

Unnamed: 0,country,language,official
0,Afghanistan,Dari,1
1,Afghanistan,Other,0
2,Afghanistan,Pashto,1
3,Afghanistan,Turkic,0
4,Netherlands,Dutch,1
...,...,...,...
904,Zimbabwe,Tonga,1
905,Zimbabwe,Tswana,1
906,Zimbabwe,Venda,1
907,Zimbabwe,Xhosa,1
