In [348]:
#Just the basics to code sql
import psycopg2   as g2        #psyco must run with sqlalchemy
import sqlalchemy as sal       #needed for solid connection to postgres databases
import pandas     as pd        #needed for prettier sql result outputs
print("psycopg2:", g2.__version__)
print("sqlalchemy:", sal.__version__)
print("pandas:", pd.__version__)

psycopg2: 2.9.10 (dt dec pq3 ext lo64)
sqlalchemy: 2.0.38
pandas: 2.2.3


In [349]:
#Connect to psql w/ sal and g2 Note: Grok 3 says  ,text  is version related
from sqlalchemy import create_engine, text
user = "postgres"
password = "password"
host = "localhost"
port = "5433"
name = "rt"
pg = "postgresql"
#Create a string for the engine
c = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{name}"
#Create engine
eg = create_engine(c)
#Test Connection
try:
    with eg.connect() as cn:
        result = cn.execute (text("SELECT version();"))
        print("BINGO!", result.fetchone()[0])
except Exception as e:
   print("Get to the Chawpaw!:", e)


BINGO! PostgreSQL 17.4 on x86_64-windows, compiled by msvc-19.42.34436, 64-bit


In [350]:
#Query SQL Tables
from sqlalchemy import text

query = "SELECT * FROM films LIMIT 5;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,id,title,release_year,country,duration,language,certification,gross,budget
0,2,Over the Hill to the Poorhouse,1920.0,USA,110.0,,,3000000.0,100000.0
1,3,The Big Parade,1925.0,USA,151.0,,Not Rated,,245000.0
2,4,Metropolis,1927.0,Germany,145.0,German,Not Rated,26435.0,6000000.0
3,5,Pandora's Box,1929.0,Germany,110.0,German,Not Rated,9950.0,
4,6,The Broadway Melody,1929.0,USA,100.0,English,Passed,2808000.0,379000.0


In [351]:
###############################################        Introducing Joins Section 1 & 2          ####################################################
# NOTES
## All sql code has to be in red or else something is wrong
## When using, USING () are needed
## INNER JOINS returns exact matches only in each table
## LEFT JOIN allows nulls in right table; right join allows nulls on the left, full join allows nulls in either table
## When using GROUP BY the group must be in SELECT, duh.
## IS NULL in WHERE must have a field; ie name ...  OR name IS NULL

In [352]:
from sqlalchemy import text

query = "\
SELECT prime_ministers.country, prime_ministers.continent, prime_minister, president \
FROM presidents \
INNER JOIN prime_ministers \
ON presidents.country = prime_ministers.country \
LIMIT 5"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,prime_minister,president
0,Egypt,Africa,Mostafa Madbouly,Abdel Fattah el-Sisi
1,Portugal,Europe,António Costa,Marcelo Rebelo de Sousa
2,Pakistan,Asia,Shehbaz Sharif,Arif Alvi
3,India,Asia,Narendra Modi,Ram Nath Kovind


In [353]:
from sqlalchemy import text

query = "\
SELECT p2.country, p2.continent, prime_minister, president \
FROM presidents AS p1 \
INNER JOIN prime_ministers AS p2 \
ON p1.country = p2.country \
LIMIT 5; "
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,prime_minister,president
0,Egypt,Africa,Mostafa Madbouly,Abdel Fattah el-Sisi
1,Portugal,Europe,António Costa,Marcelo Rebelo de Sousa
2,Pakistan,Asia,Shehbaz Sharif,Arif Alvi
3,India,Asia,Narendra Modi,Ram Nath Kovind


In [354]:
##########################################   SELF JOINING TABLES     #################################
from sqlalchemy import text

query = "\
SELECT p1.country AS Country1, p2.country AS Country2, p1.continent \
FROM prime_ministers AS p1 \
INNER JOIN prime_ministers AS p2 \
ON p1.continent = p2.continent \
  AND p1.country <> p2.country \
LIMIT 5;" 
# The AND statement here removes duplicates in the resulting table
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country1,country2,continent
0,Portugal,United Kingdom,Europe
1,Portugal,Norway,Europe
2,Pakistan,India,Asia
3,Pakistan,Brunei,Asia
4,Pakistan,Oman,Asia


In [355]:
from sqlalchemy import text

query = " \
SELECT p1.country_code, \
   p1.size AS size2010, \
   p2.size AS size2015 \
FROM populations AS p1 INNER JOIN populations AS p2 \
ON p1.country_code = p2.country_code \
WHERE p1.year = 2010 \
    AND p1.year = p2.year - 5;"

df = pd.read_sql(text(query),eg)
df
## Filter such that p1.year is always five years before p2.year WTF!!

Unnamed: 0,country_code,size2010,size2015
0,ABW,101597.0,103889.0
1,AFG,27962208.0,32526562.0
2,AGO,21219954.0,25021974.0
3,ALB,2913021.0,2889167.0
4,AND,84419.0,70473.0
...,...,...,...
212,XKX,1775680.0,1801800.0
213,YEM,23591972.0,26832216.0
214,ZAF,50979432.0,55011976.0
215,ZMB,13917439.0,16211767.0


In [356]:
###############################################        Introducing UNION Section 3         ####################################################
# NOTES
# JOINS merge tables on a particular criteria. ON code from our examples
# UNION stacks tables
# As I merge .csv file imported into postgres I will use UNION for similar datasets
# 




In [357]:
###################################        SECTION 4 Introducing Nested Queries: Sub Queries          #######################################

# Let's redo the last section on nested sub queirs
# These anti joins and semi joins are kind of unions and except functionality

#NOTES
 #Sub-queries can be used inside SELECT, FROM, or WHERE
 #Sub-queries can use a 3rd table as a filter criteria
 #Semi joins: chooses records in the left table where a condition is met in the right table
 #Anti joins: 

In [358]:
##################################################          NESTED IN WHERE       ############################################

# I'm using a nest query, sub-query in the WHERE clause to first filter which country achieved independence before 1800 from states table.
# Then from that list I'm selecting president, country, and continent from the presidents table. 

from sqlalchemy import text

query = "\
 SELECT president, country, continent \
 FROM presidents \
 WHERE country IN \
  (SELECT country \
  FROM states \
  WHERE indep_year < 1800);"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,president,country,continent
0,Marcelo Rebelo de Sousa,Portugal,Europe
1,Joe Biden,USA,North America


In [359]:
# ANTI JOIN
# Now I want to find out which countries did not achieve independence before 1800.
# Let's add an additional critera to limit our results to Americas

from sqlalchemy import text

query = "\
 SELECT president, country \
 FROM presidents \
 WHERE continent LIKE '%America' \
  AND country NOT IN \
   (SELECT country \
   FROM states \
   WHERE indep_year < 1800);"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,president,country
0,Luis Lacalle Pou,Uruguay
1,Gabriel Boric,Chile


In [360]:
from sqlalchemy import text

query = "\
SELECT code \
FROM countries \
WHERE region = 'Middle East';"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code
0,ARE
1,ARM
2,AZE
3,BHR
4,GEO
5,IRQ
6,ISR
7,YEM
8,JOR
9,KWT


In [361]:
from sqlalchemy import text

query = "\
SELECT DISTINCT name \
FROM languages \
ORDER BY name ASC;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,name
0,Afar
1,Afrikaans
2,Akyem
3,Albanian
4,Alsatian
...,...
391,Yapese
392,Yoruba
393,Yue
394,Zezuru


In [362]:
from sqlalchemy import text

query = "\
SELECT DISTINCT name \
FROM languages \
WHERE code IN \
 (SELECT code \
  FROM countries \
  WHERE region = 'Middle East') \
ORDER BY name;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,name
0,Arabic
1,Aramaic
2,Armenian
3,Azerbaijani
4,Azeri
5,Baluchi
6,Bulgarian
7,Circassian
8,English
9,Farsi


In [363]:
#Give me a list of all the countries on the Oceania continent

from sqlalchemy import text

query = "\
SELECT code, name \
FROM countries \
WHERE continent = 'Oceania';"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code,name
0,ASM,American Samoa
1,AUS,Australia
2,FJI,Fiji Islands
3,GUM,Guam
4,KIR,Kiribati
5,MHL,Marshall Islands
6,FSM,"Micronesia, Federated States of"
7,NRU,Nauru
8,PLW,Palau
9,PNG,Papua New Guinea


In [364]:
# Give me a list of currencies used in countries on the Oceania continent.
# Continent is not a field in the currencies table, but country code is.
# Country code is listed with continents on the countries table.
# INNER JOIN to bring in each of the fields from two tables into 1 table.

# SUMMARY: Give me a list of all the country's currencies used on the Oceania continent

from sqlalchemy import text

query = "\
SELECT c1.code, c1.name, basic_unit AS currency \
FROM countries AS c1 \
INNER JOIN currencies AS c2 \
 ON c1.code = c2.code \
WHERE c1.continent = 'Oceania';"

df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code,name,currency
0,AUS,Australia,Australian dollar
1,PYF,French Polynesia,CFP franc
2,KIR,Kiribati,Australian dollar
3,MHL,Marshall Islands,United States dollar
4,NRU,Nauru,Australian dollar
5,NCL,New Caledonia,CFP franc
6,NZL,New Zealand,New Zealand dollar
7,PLW,Palau,United States dollar
8,PNG,Papua New Guinea,Papua New Guinean kina
9,WSM,Samoa,Samoan tala


In [365]:
# Now I want a list of all countries on the Oceania continent that are NOT in the currencies table
# Ok, this is going to compare the list of countries on the Oceania continent from the countries table against the list of countries on the currencies table.
# Then is going to create a list of countries on the Oceania continent that are NOT in the currency tables. (Maybe the country does not have a registered currency)

from sqlalchemy import text

query = "\
SELECT code, name \
FROM countries \
WHERE continent = 'Oceania' \
  AND code NOT IN \
    (SELECT code \
    FROM currencies);"
df = pd.read_sql(text(query),eg)
df

##################################### WTF!  THE 2 PREVIOUS QUESTIONS ARE WORDED HORRIBLY! ##########################

Unnamed: 0,code,name
0,ASM,American Samoa
1,FJI,Fiji Islands
2,GUM,Guam
3,FSM,"Micronesia, Federated States of"
4,MNP,Northern Mariana Islands


In [366]:
#
#################################################     NEST IN SELECT       #########################################


from sqlalchemy import text

query = "\
SELECT DISTINCT continent, \
    (SELECT COUNT (*) \
    FROM monarchs \
    WHERE states.continent = monarchs.continent) AS monarch_count \
FROM states;"
df = pd.read_sql(text(query),eg)
df

#############################################             FOUND CODING ERROR IN THE TRAINING MATERIAL  NO WONDER...        ##########################

Unnamed: 0,continent,monarch_count
0,Africa,0
1,Asia,2
2,Europe,2
3,North America,0
4,Oceania,0
5,South America,0


In [367]:
#  Working from the same table

#from sqlalchemy import text

#query = "\
#SELECT * \
#FROM populations \
#WHERE life_expectancy > 1.15 * AVG(life_expectancy) \
#    AND year = 2015;"
#df = pd.read_sql(text(query),eg)
#df

####################################             this code doesn't even work in the training material          ###############################

In [368]:
from sqlalchemy import text

query = "\
SELECT * \
FROM populations \
WHERE year = 2015 \
 AND life_expectancy > 1.15 * \
   (SELECT AVG(life_expectancy) \
    FROM populations \
    WHERE year = 2015);"
df = pd.read_sql(text(query),eg)
df
###########################################      After all that, I forgot a )    :-( 

Unnamed: 0,pop_id,country_code,year,fertility_rate,life_expectancy,size
0,21,AUS,2015,1.833,82.45122,23789752.0
1,376,CHE,2015,1.54,83.19756,8281430.0
2,356,ESP,2015,1.32,83.380486,46443992.0
3,134,FRA,2015,2.01,82.67073,66538392.0
4,170,HKG,2015,1.195,84.278046,7305700.0
5,174,ISL,2015,1.93,82.86098,330815.0
6,190,ITA,2015,1.37,83.49024,60730584.0
7,194,JPN,2015,1.46,83.84366,126958470.0
8,340,SGP,2015,1.24,82.59512,5535002.0
9,374,SWE,2015,1.88,82.551216,9799186.0


In [369]:
from sqlalchemy import text

query = "\
SELECT cities.name, cities.country_code, cities.urbanarea_pop \
FROM cities \
INNER JOIN countries \
ON code = country_code \
WHERE cities.name = countries.capital \
ORDER BY urbanarea_pop DESC;"
df = pd.read_sql(text(query),eg)
df


Unnamed: 0,name,country_code,urbanarea_pop
0,Beijing,CHN,21516000.0
1,Dhaka,BGD,14543124.0
2,Tokyo,JPN,13513734.0
3,Moscow,RUS,12197596.0
4,Cairo,EGY,10230350.0
...,...,...,...
61,Dakar,SEN,1146053.0
62,Abu Dhabi,ARE,1145000.0
63,Tripoli,LBY,1126000.0
64,Yerevan,ARM,1060138.0


In [370]:
#INNER JOIN ONLY RETURNS PERFECT MATCHES FROM EACH TABLE
#LEFT JOIN KEEPS ALL RECORDS IN THE LEFT TABLE AND BRINGS IN MATCH DATA FROM THE RIGHT AND NULL FOR RECORDS THAT DO NOT MATCH

from sqlalchemy import text

query = "\
SELECT countries.name AS country, COUNT (*) AS cities_num \
FROM countries \
LEFT JOIN cities \
ON countries.code = cities.country_code \
GROUP BY countries.name \
ORDER BY cities_num DESC, country ASC \
LIMIT 9;"
df = pd.read_sql(text(query),eg)
df
############################        have to use GROUP BY with aggregate functions are in the SELECT statement like COUNT () SUM () AVG () MAX () MIN ()

Unnamed: 0,country,cities_num
0,China,36
1,India,18
2,Japan,11
3,Brazil,10
4,Pakistan,9
5,United States,9
6,Indonesia,7
7,Russian Federation,7
8,South Korea,7


In [371]:
from sqlalchemy import text

query = "\
SELECT countries.name AS country, \
   (SELECT COUNT (cities.name) \
    FROM cities \
    WHERE countries.code = cities.country_code) AS cities_num \
FROM countries \
ORDER BY cities_num DESC;"
df = pd.read_sql(text(query),eg)
df

#############################          In sub queries you do not need GROUP BY        ############################

Unnamed: 0,country,cities_num
0,China,36
1,India,18
2,Japan,11
3,Brazil,10
4,United States,9
...,...,...
200,Moldova,0
201,Monaco,0
202,Mongolia,0
203,Namibia,0


In [372]:
#
#################################################     NEST IN FROM       #########################################
#

In [373]:
from sqlalchemy import text

query = "\
SELECT continent, MAX(indep_year) AS most_recent \
FROM states \
GROUP BY continent;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,continent,most_recent
0,South America,1825
1,Oceania,1901
2,Africa,1922
3,Asia,1984
4,North America,1776
5,Europe,1905


In [374]:
from sqlalchemy import text

query = "\
SELECT DISTINCT monarchs.continent, sub.most_recent \
FROM monarchs, \
    (SELECT continent, MAX(indep_year) AS most_recent \
    FROM states \
    GROUP BY continent) AS sub \
WHERE monarchs.continent = sub.continent;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,continent,most_recent
0,Asia,1984
1,Europe,1905


In [375]:
from sqlalchemy import text

query = "\
SELECT code, COUNT (name) as lang_num \
FROM languages \
GROUP BY code \
ORDER BY lang_num DESC;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code,lang_num
0,ZMB,19
1,ZWE,16
2,ETH,16
3,IND,14
4,NPL,14
...,...,...
207,COL,1
208,AIA,1
209,DOM,1
210,SAU,1


In [376]:
from sqlalchemy import text

query = "\
SELECT local_name, sub.lang_num \
FROM countries, \
    (SELECT code, COUNT(name) AS lang_num \
    FROM languages \
    GROUP BY code) AS sub \
WHERE countries.code = sub.code \
ORDER BY lang_num DESC;"
df = pd.read_sql(text(query),eg)
df

# COUNT(name) AS lang_num is a FIELD NAME in the table called sub   which is now  sub.lang_num

Unnamed: 0,local_name,lang_num
0,Zambia,19
1,YeItyop´iya,16
2,Zimbabwe,16
3,Nepal,14
4,Bharat/India,14
...,...,...
193,Cuba,1
194,Colombia,1
195,Nederland,1
196,Al-´Arabiya as-Sa´udiya,1


In [377]:
from sqlalchemy import text

query = "\
SELECT code, inflation_rate, unemployment_rate \
FROM economies \
WHERE year = 2015 \
 AND code IN \
     (SELECT code \
      FROM countries \
      WHERE gov_form LIKE '%Republic%' OR gov_form LIKE '%Monarchy%') \
ORDER BY inflation_rate;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code,inflation_rate,unemployment_rate
0,LBN,-3.749,
1,ZWE,-2.410,
2,KNA,-2.302,
3,MHL,-2.169,
4,VCT,-1.726,
...,...,...,...
173,YEM,39.403,
174,UKR,48.684,9.143
175,VEN,121.738,7.400
176,ARG,,


In [378]:
#
#####################    COURSE 3: SECTION 4 AGAIN :-(    #########################################
#

In [379]:
from sqlalchemy import text

query = "\
SELECT country, continent, president \
FROM presidents;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,president
0,Egypt,Africa,Abdel Fattah el-Sisi
1,Portugal,Europe,Marcelo Rebelo de Sousa
2,USA,North America,Joe Biden
3,Uruguay,South America,Luis Lacalle Pou
4,Pakistan,Asia,Arif Alvi
5,Chile,South America,Gabriel Boric
6,India,Asia,Ram Nath Kovind


In [380]:
from sqlalchemy import text

query = "\
SELECT country \
FROM states \
WHERE indep_year < 1800;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country
0,Portugal
1,USA


In [381]:
##################            SEMI JOIN

from sqlalchemy import text

query = "\
SELECT country, continent, president \
FROM presidents \
WHERE country IN \
    (SELECT country \
    FROM states \
    WHERE indep_year < 1800);"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,president
0,Portugal,Europe,Marcelo Rebelo de Sousa
1,USA,North America,Joe Biden


In [382]:
##################            ANTI JOIN

###   What countries in the Americas were not form before 1800

from sqlalchemy import text

query = "\
SELECT country, continent, president \
FROM presidents \
WHERE continent LIKE '%America'\
    AND country NOT IN \
        (SELECT country \
        FROM states \
        WHERE indep_year < 1800);"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,president
0,Uruguay,South America,Luis Lacalle Pou
1,Chile,South America,Gabriel Boric


In [383]:

# I want to know all the languages spoken in the region = Middle East


from sqlalchemy import text

query = "\
SELECT code \
FROM countries \
WHERE region = 'Middle East' LIMIT 3;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code
0,ARE
1,ARM
2,AZE


In [384]:
from sqlalchemy import text

query = "\
SELECT name \
FROM languages \
WHERE code IN \
    (SELECT code \
    FROM countries \
    WHERE region = 'Middle East');"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,name
0,Armenian
1,Kurdish
2,Other
3,Azerbaijani
4,Russian
...,...
61,Persian
62,English
63,Hindi
64,Urdu


In [385]:
#
# I want to know all the currencies used on the Oceania continent
#

In [386]:
from sqlalchemy import text

query = "\
SELECT code, name \
FROM countries \
WHERE continent LIKE '%Oceania%' LIMIT 3;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code,name
0,ASM,American Samoa
1,AUS,Australia
2,FJI,Fiji Islands


In [387]:
from sqlalchemy import text

query = """
SELECT code, name
FROM countries 
WHERE continent = 'Oceania' LIMIT 3
"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code,name
0,ASM,American Samoa
1,AUS,Australia
2,FJI,Fiji Islands


In [388]:
#
# Now I want to know the currencies used in these countries on the Oceania continent
#

from sqlalchemy import text

query = """
SELECT code, basic_unit AS currency
FROM currencies
WHERE code IN
    (SELECT code
    FROM countries 
    WHERE continent = 'Oceania')
"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code,currency
0,AUS,Australian dollar
1,PYF,CFP franc
2,KIR,Australian dollar
3,MHL,United States dollar
4,NRU,Australian dollar
5,NCL,CFP franc
6,NZL,New Zealand dollar
7,PLW,United States dollar
8,PNG,Papua New Guinean kina
9,WSM,Samoan tala


In [389]:
# ANTI JOIN
# Now I want to know if there are any countries on Oceania that are not listed. Aka, that dont have an assigned currency in the table
from sqlalchemy import text

query = """
SELECT code, name
FROM countries
WHERE continent = 'Oceania'
    AND code NOT IN
        (SELECT code
        FROM currencies)
"""
df = pd.read_sql(text(query),eg)
df


Unnamed: 0,code,name
0,ASM,American Samoa
1,FJI,Fiji Islands
2,GUM,Guam
3,FSM,"Micronesia, Federated States of"
4,MNP,Northern Mariana Islands


In [390]:
#
# Now let's use INNER JOIN and Aliases
# I want to know all the countries on the continent of Oceania that have currencies
# There is a difference in INNER JOIN and a sub-query inside the where statement: INNER JOIN joins tables, where the sub-query inside WHERE filters

from sqlalchemy import text

query = """
SELECT name
FROM countries AS c1
INNER JOIN currencies AS c2 
ON c1.code = c2.code
WHERE continent = 'Oceania'
"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,name
0,Australia
1,French Polynesia
2,Kiribati
3,Marshall Islands
4,Nauru
5,New Caledonia
6,New Zealand
7,Palau
8,Papua New Guinea
9,Samoa


In [391]:
#
# The most popular filter sub-query is used in WHERE
# SELECT can handle sub-queries as well

In [392]:
#
# I want to count the number of monarchs listed in the monarchs table for each continent in that states table using a sub-query in the SELECT statement
# Aka how many monarchs on each continent

from sqlalchemy import text

query = """
SELECT *
FROM monarchs
LIMIT 2
"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,monarch
0,Brunei,Asia,Hassanal Bolkiah
1,Norway,Europe,Harald V


In [393]:
from sqlalchemy import text

query = """
SELECT *
FROM states
LIMIT 2
"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,indep_year
0,Australia,Oceania,1901
1,Brunei,Asia,1984


In [394]:
###############################  Sub-queries inside SELECT require alais      #########################

from sqlalchemy import text

query = """

SELECT DISTINCT continent,
    (SELECT COUNT (*)
    FROM monarchs
    WHERE states.continent = monarchs.continent) AS monarch_count
FROM states

"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,continent,monarch_count
0,Africa,0
1,Asia,2
2,Europe,2
3,North America,0
4,Oceania,0
5,South America,0


In [395]:
#
# I want to know which countries had higher than average AVG life_expectancy (1.15) in 2015 from the populations table
#

from sqlalchemy import text

query = """

SELECT *
FROM populations
LIMIT 3

"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,pop_id,country_code,year,fertility_rate,life_expectancy,size
0,20,ABW,2010,1.704,74.95354,101597.0
1,19,ABW,2015,1.647,75.573586,103889.0
2,2,AFG,2010,5.746,58.97083,27962208.0


In [396]:
from sqlalchemy import text

query = """

SELECT AVG(life_expectancy) AS avg_life_expectancy
FROM populations
WHERE year = 2015


"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,avg_life_expectancy
0,71.676342


In [397]:
from sqlalchemy import text

query = """

SELECT *
FROM populations
WHERE year = 2015 
    AND
        life_expectancy > 1.15 * 
        (SELECT AVG(life_expectancy) 
         FROM populations
         WHERE year = 2015)

"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,pop_id,country_code,year,fertility_rate,life_expectancy,size
0,21,AUS,2015,1.833,82.45122,23789752.0
1,376,CHE,2015,1.54,83.19756,8281430.0
2,356,ESP,2015,1.32,83.380486,46443992.0
3,134,FRA,2015,2.01,82.67073,66538392.0
4,170,HKG,2015,1.195,84.278046,7305700.0
5,174,ISL,2015,1.93,82.86098,330815.0
6,190,ITA,2015,1.37,83.49024,60730584.0
7,194,JPN,2015,1.46,83.84366,126958470.0
8,340,SGP,2015,1.24,82.59512,5535002.0
9,374,SWE,2015,1.88,82.551216,9799186.0


In [398]:
#
# I want to know the population of capital cities in order
#

from sqlalchemy import text

query = """

SELECT *
FROM countries
LIMIT 1

"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,code,name,continent,region,surface_area,indep_year,local_name,gov_form,capital,cap_long,cap_lat,geom
0,AFG,Afghanistan,Asia,Southern and Central Asia,652090.0,1919,Afganistan/Afqanestan,Islamic Emirate,Kabul,69.1761,34.5228,0101000020E610000000000040454B514000000020EB42...


In [399]:
from sqlalchemy import text

query = """

SELECT *
FROM cities
LIMIT 1

"""
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,name,country_code,city_proper_pop,metroarea_pop,urbanarea_pop
0,Abidjan,CIV,4765000.0,,4765000.0


In [400]:
from sqlalchemy import text

query = """

SELECT c2.name, country_code, urbanarea_pop
FROM countries AS c1
INNER JOIN cities AS c2
ON c1.code = c2.country_code
WHERE c1.capital = c2.name
ORDER BY urbanarea_pop DESC

"""
df = pd.read_sql(text(query),eg)
df


Unnamed: 0,name,country_code,urbanarea_pop
0,Beijing,CHN,21516000.0
1,Dhaka,BGD,14543124.0
2,Tokyo,JPN,13513734.0
3,Moscow,RUS,12197596.0
4,Cairo,EGY,10230350.0
...,...,...,...
61,Dakar,SEN,1146053.0
62,Abu Dhabi,ARE,1145000.0
63,Tripoli,LBY,1126000.0
64,Yerevan,ARM,1060138.0


In [401]:
#I want a list of countries that have the most cities in them using left join
from sqlalchemy import text 
query = """ SELECT * FROM countries LIMIT 1 """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,code,name,continent,region,surface_area,indep_year,local_name,gov_form,capital,cap_long,cap_lat,geom
0,AFG,Afghanistan,Asia,Southern and Central Asia,652090.0,1919,Afganistan/Afqanestan,Islamic Emirate,Kabul,69.1761,34.5228,0101000020E610000000000040454B514000000020EB42...


In [402]:
from sqlalchemy import text 
query = """ SELECT * FROM cities LIMIT 1 """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,name,country_code,city_proper_pop,metroarea_pop,urbanarea_pop
0,Abidjan,CIV,4765000.0,,4765000.0


In [403]:
from sqlalchemy import text 
query = """ SELECT country_code, COUNT(*) FROM cities GROUP BY country_code LIMIT 1 """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,country_code,count
0,KHM,1


In [404]:
from sqlalchemy import text 
query = """ 

SELECT COUNT(c1.name), c2.code
FROM cities AS c1
LEFT JOIN countries AS c2 ON
c1.country_code = c2.code
GROUP BY c2.code
ORDER BY COUNT(c1.name) DESC
LIMIT 3
"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,count,code
0,36,CHN
1,18,IND
2,11,JPN


In [405]:
#I want a list of countries that have the most cities in them using sub-query in SELECT
# When using sub-queries in SELECT, must use WHERE clause to connect the tables, and use an alias because the alias is the FIELD NAME in the new table

from sqlalchemy import text 
query = """ 
SELECT countries.name,(
    SELECT COUNT(*)
    FROM cities
    WHERE cities.country_code = countries.code) AS number_of_cities
FROM countries 
ORDER BY number_of_cities DESC

"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,name,number_of_cities
0,China,36
1,India,18
2,Japan,11
3,Brazil,10
4,United States,9
...,...,...
200,Moldova,0
201,Monaco,0
202,Mongolia,0
203,Namibia,0


In [406]:

# I want to know which continent has the youngest country and has a monarch still in power using a sub query in the FROM statement

from sqlalchemy import text 
query = """ SELECT * FROM states LIMIT 1 """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,country,continent,indep_year
0,Australia,Oceania,1901


In [407]:
from sqlalchemy import text 
query = """ SELECT * FROM monarchs LIMIT 1  """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,country,continent,monarch
0,Brunei,Asia,Hassanal Bolkiah


In [408]:
from sqlalchemy import text 
query = """ 
SELECT s1.most_recent, monarchs.continent
FROM monarchs, (
    SELECT continent, MAX(indep_year) AS most_recent
    FROM states
    GROUP BY continent) AS s1
WHERE monarchs.continent = s1.continent

"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,most_recent,continent
0,1984,Asia
1,1905,Europe
2,1984,Asia
3,1905,Europe


In [409]:
# I want to know how many languages are spoken in each country using a sub-query inside FROM
from sqlalchemy import text 
query = """ SELECT * FROM languages LIMIT 1 """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,lang_id,code,name,percent,official
0,1,AFG,Dari,50.0,True


In [410]:
from sqlalchemy import text 
query = """ SELECT * FROM countries LIMIT 1 """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,code,name,continent,region,surface_area,indep_year,local_name,gov_form,capital,cap_long,cap_lat,geom
0,AFG,Afghanistan,Asia,Southern and Central Asia,652090.0,1919,Afganistan/Afqanestan,Islamic Emirate,Kabul,69.1761,34.5228,0101000020E610000000000040454B514000000020EB42...


In [411]:
from sqlalchemy import text 
query = """ 

SELECT countries.name, s1.lang_count
FROM countries, (
    SELECT languages.code, COUNT (languages.name) AS lang_count
    FROM languages 
    GROUP BY languages.code) AS s1
WHERE s1.code = countries.code
ORDER BY lang_count DESC
"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,name,lang_count
0,Zambia,19
1,Ethiopia,16
2,Zimbabwe,16
3,Nepal,14
4,India,14
...,...,...
193,Cuba,1
194,Colombia,1
195,Netherlands,1
196,Saudi Arabia,1


In [412]:
# I want to analyze code, inflation and unemployment rate in countries for 2015 from the economies table 
# that have Republic or Monarchy in countries table in their gov_form using sub-query in WHERE
from sqlalchemy import text 
query = """ SELECT * FROM economies LIMIT 1 """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,econ_id,code,year,income_group,gdp_percapita,gross_savings,inflation_rate,total_investment,unemployment_rate,exports,imports
0,1,AFG,2010,Low income,539.667,37.133,2.179,30.402,,46.394,24.381


In [413]:
from sqlalchemy import text 
query = """ SELECT * FROM countries LIMIT 1 """
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,code,name,continent,region,surface_area,indep_year,local_name,gov_form,capital,cap_long,cap_lat,geom
0,AFG,Afghanistan,Asia,Southern and Central Asia,652090.0,1919,Afganistan/Afqanestan,Islamic Emirate,Kabul,69.1761,34.5228,0101000020E610000000000040454B514000000020EB42...


In [440]:
from sqlalchemy import text 
query = """ 
SELECT economies.code, economies.inflation_rate, economies.unemployment_rate 
FROM economies
WHERE year = 2015
LIMIT 1 
"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,code,inflation_rate,unemployment_rate
0,AFG,-1.549,


In [439]:
from sqlalchemy import text 
query = """ 

SELECT countries.code, countries.gov_form
FROM countries
WHERE gov_form LIKE 'Republic' OR gov_form LIKE 'Monarchy'

LIMIT 1
"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,code,gov_form
0,ALB,Republic


In [446]:
from sqlalchemy import text 
query = """ 

SELECT economies.code, economies.inflation_rate, economies.unemployment_rate
FROM economies
WHERE year = 2015
    AND code IN
    (SELECT countries.code
    FROM countries
    WHERE gov_form LIKE 'Republic' OR gov_form LIKE 'Monarchy')
    
    
"""
df = pd.read_sql(text(query),eg) 
df

# Here's the different between filtering with WHERE and JOIN because I can not present Republic/Monarchy unless I join

Unnamed: 0,code,inflation_rate,unemployment_rate
0,AGO,10.287,
1,ALB,1.896,17.10
2,ARM,3.731,18.50
3,BDI,5.553,
4,BEN,0.271,
...,...,...,...
119,VUT,2.483,
120,YEM,39.403,
121,ZAF,4.575,25.35
122,ZMB,10.107,


In [450]:
from sqlalchemy import text 
query = """ 

SELECT economies.code, economies.inflation_rate, economies.unemployment_rate
FROM economies
WHERE year = 2015
    AND code IN
    (SELECT countries.code
    FROM countries
    WHERE gov_form LIKE 'Republic' OR gov_form LIKE 'Monarchy')
     
"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,code,inflation_rate,unemployment_rate
0,AGO,10.287,
1,ALB,1.896,17.10
2,ARM,3.731,18.50
3,BDI,5.553,
4,BEN,0.271,
...,...,...,...
119,VUT,2.483,
120,YEM,39.403,
121,ZAF,4.575,25.35
122,ZMB,10.107,


In [498]:
# I want to find the Top 10 capital cities in Europe and Americans by city_perc
from sqlalchemy import text 

query = """ 

SELECT cities.name AS city_name, country_code, city_proper_pop, metroarea_pop, city_proper_pop / metroarea_pop * 100 AS city_per
FROM cities
WHERE metroarea_pop IS NOT NULL
    AND name IN 
    (SELECT countries.capital
     FROM countries
     WHERE countries.capital = cities.name
     AND continent = 'Europe'
     OR continent LIKE '%America%')

"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,city_name,country_code,city_proper_pop,metroarea_pop,city_per
0,Berlin,DEU,3517424.0,5871022.0,59.911615
1,Bogota,COL,7878783.0,9800000.0,80.395746
2,Brasilia,BRA,2556149.0,3919864.0,65.210146
3,Budapest,HUN,1759407.0,2927944.0,60.090184
4,Buenos Aires,ARG,3054300.0,14122000.0,21.627957
5,Caracas,VEN,1943901.0,2923959.0,66.481817
6,Guatemala City,GTM,2110100.0,4500000.0,46.891111
7,Lima,PER,8852000.0,10750000.0,82.344186
8,London,GBR,8673713.0,13879757.0,62.491822
9,Mexico City,MEX,8974724.0,20063000.0,44.732711


In [511]:
from sqlalchemy import text 

query = """ 

SELECT cities.name AS city_name, country_code, city_proper_pop, metroarea_pop, city_proper_pop / metroarea_pop * 100 AS city_per
FROM cities
WHERE name IN 
    (SELECT countries.capital
     FROM countries
     WHERE countries.capital = cities.name
     AND continent = 'Europe'
     OR continent LIKE '%America%') 
AND metroarea_pop IS NOT NULL


"""
df = pd.read_sql(text(query),eg) 
df

Unnamed: 0,city_name,country_code,city_proper_pop,metroarea_pop,city_per
0,Berlin,DEU,3517424.0,5871022.0,59.911615
1,Bogota,COL,7878783.0,9800000.0,80.395746
2,Brasilia,BRA,2556149.0,3919864.0,65.210146
3,Budapest,HUN,1759407.0,2927944.0,60.090184
4,Buenos Aires,ARG,3054300.0,14122000.0,21.627957
5,Caracas,VEN,1943901.0,2923959.0,66.481817
6,Guatemala City,GTM,2110100.0,4500000.0,46.891111
7,Lima,PER,8852000.0,10750000.0,82.344186
8,London,GBR,8673713.0,13879757.0,62.491822
9,Mexico City,MEX,8974724.0,20063000.0,44.732711


In [None]:
#######################################           This finishes the third course of nine in the SQL Associate sequence on Data Camp