In [1]:
#Just the basics to code sql
import psycopg2   as g2        #psyco must run with sqlalchemy
import sqlalchemy as sal       #needed for solid connection to postgres databases
import pandas     as pd        #needed for prettier sql result outputs
print("psycopg2:", g2.__version__)
print("sqlalchemy:", sal.__version__)
print("pandas:", pd.__version__)

psycopg2: 2.9.10 (dt dec pq3 ext lo64)
sqlalchemy: 2.0.38
pandas: 2.2.3


In [2]:
#Connect to psql w/ sal and g2 Note: Grok 3 says  ,text  is version related
from sqlalchemy import create_engine, text
user = "postgres"
password = "password"
host = "localhost"
port = "5433"
name = "rt"
pg = "postgresql"
#Create a string for the engine
c = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{name}"
#Create engine
eg = create_engine(c)
#Test Connection
try:
    with eg.connect() as cn:
        result = cn.execute (text("SELECT version();"))
        print("BINGO!", result.fetchone()[0])
except Exception as e:
   print("Get to the Chawpaw!:", e)


BINGO! PostgreSQL 17.4 on x86_64-windows, compiled by msvc-19.42.34436, 64-bit


In [6]:
#Query SQL Tables
from sqlalchemy import text

query = "SELECT * FROM films LIMIT 6;"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,id,title,release_year,country,duration,language,certification,gross,budget
0,2,Over the Hill to the Poorhouse,1920.0,USA,110.0,,,3000000.0,100000.0
1,3,The Big Parade,1925.0,USA,151.0,,Not Rated,,245000.0
2,4,Metropolis,1927.0,Germany,145.0,German,Not Rated,26435.0,6000000.0
3,5,Pandora's Box,1929.0,Germany,110.0,German,Not Rated,9950.0,
4,6,The Broadway Melody,1929.0,USA,100.0,English,Passed,2808000.0,379000.0
5,7,Hell's Angels,1930.0,USA,96.0,English,Passed,,3950000.0


In [None]:
###############################################        Introducing Joins Section 1 & 2          ####################################################
# NOTES
## All sql code has to be in red or else something is wrong
## When using, USING () are needed
## INNER JOINS returns exact matches only in each table
## LEFT JOIN allows nulls in right table; right join allows nulls on the left, full join allows nulls in either table
## When using GROUP BY the group must be in SELECT, duh.
## IS NULL in WHERE must have a field; ie name ...  OR name IS NULL

In [15]:
from sqlalchemy import text

query = "\
SELECT prime_ministers.country, prime_ministers.continent, prime_minister, president \
FROM presidents \
INNER JOIN prime_ministers \
ON presidents.country = prime_ministers.country \
LIMIT 5"
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,prime_minister,president
0,Egypt,Africa,Mostafa Madbouly,Abdel Fattah el-Sisi
1,Portugal,Europe,António Costa,Marcelo Rebelo de Sousa
2,Pakistan,Asia,Shehbaz Sharif,Arif Alvi
3,India,Asia,Narendra Modi,Ram Nath Kovind


In [22]:
from sqlalchemy import text

query = "\
SELECT p2.country, p2.continent, prime_minister, president \
FROM presidents AS p1 \
INNER JOIN prime_ministers AS p2 \
ON p1.country = p2.country \
LIMIT 5; "
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country,continent,prime_minister,president
0,Egypt,Africa,Mostafa Madbouly,Abdel Fattah el-Sisi
1,Portugal,Europe,António Costa,Marcelo Rebelo de Sousa
2,Pakistan,Asia,Shehbaz Sharif,Arif Alvi
3,India,Asia,Narendra Modi,Ram Nath Kovind


In [31]:
##########################################   SELF JOINING TABLES     #################################
from sqlalchemy import text

query = "\
SELECT p1.country AS Country1, p2.country AS Country2, p1.continent \
FROM prime_ministers AS p1 \
INNER JOIN prime_ministers AS p2 \
ON p1.continent = p2.continent \
  AND p1.country <> p2.country \
LIMIT 5;" 
# The AND statement here removes duplicates in the resulting table
df = pd.read_sql(text(query),eg)
df

Unnamed: 0,country1,country2,continent
0,Portugal,United Kingdom,Europe
1,Portugal,Norway,Europe
2,Pakistan,India,Asia
3,Pakistan,Brunei,Asia
4,Pakistan,Oman,Asia


In [39]:
from sqlalchemy import text

query = " \
SELECT p1.country_code, \
   p1.size AS size2010, \
   p2.size AS size2015 \
FROM populations AS p1 INNER JOIN populations AS p2 \
ON p1.country_code = p2.country_code \
WHERE p1.year = 2010 \
    AND p1.year = p2.year - 5;"

df = pd.read_sql(text(query),eg)
df
## Filter such that p1.year is always five years before p2.year WTF!!

Unnamed: 0,country_code,size2010,size2015
0,ABW,101597.0,103889.0
1,AFG,27962208.0,32526562.0
2,AGO,21219954.0,25021974.0
3,ALB,2913021.0,2889167.0
4,AND,84419.0,70473.0
...,...,...,...
212,XKX,1775680.0,1801800.0
213,YEM,23591972.0,26832216.0
214,ZAF,50979432.0,55011976.0
215,ZMB,13917439.0,16211767.0


In [None]:
###############################################        Introducing UNION Section 3         ####################################################
# NOTES
# JOINS merge tables on a particular criteria. ON code from our examples
# UNION stacks tables
# As I merge .csv file imported into postgres I will use UNION for similar datasets
# 




In [None]:
from sqlalchemy import text

query = "\
SELECT
FROM
UNION, UNION ALL, INTERSECT, EXCEPT
SELECT
FROM
ORDER BY
LIMIT
"
df = pd.read_sql(text(query),eg)
df

In [None]:
###################################        Introducing Sub Queries Semi Joins Using SELECT WHERE FROM  Section         ####################################################
# NOTES
# SEMI JOIN can JOIN based a particular value in the right table
# 

In [None]:
# The IN in the first WHERE is a Sub Query Semi Join; The left table is the primary reference table
# The NOT IN changes this to Anti JOIN Sub Query 
from sqlalchemy import text

query = "\
SELECT
FROM
WHERE __A___ IN
   (SELECT __A___  
    FROM ____
    WHERE ______ < 1800);
"
df = pd.read_sql(text(query),eg)
df
# A above is the connector that allows you to search/filter using another table and the IN activates the connection to the below SELECT

In [None]:
# Anti Join reference another table
from sqlalchemy import text

query = "\
SELECT code, name
FROM countries
WHERE continent = 'Oceania'
-- Filter for countries not included in the bracketed subquery
  AND code NOT IN 
    (SELECT code
    FROM currencies);
"
df = pd.read_sql(text(query),eg)
df

In [None]:
#Sub Query using SELECT requires alias

from sqlalchemy import text

query = "\
SELECT DISTINCT ____,  
   (SELECT COUNT (*)  
    FROM ____
    WHERE ____ = _____) AS _____
FROM ____

"
df = pd.read_sql(text(query),eg)
df

In [None]:
#Using basic math to filter in sub queries 

from sqlalchemy import text

query = "\
SELECT *
FROM populations
WHERE year = 2015
-- Filter for only those populations where life expectancy is 1.15 times higher than average
  AND life_expectancy > 1.15 *
  (SELECT AVG(life_expectancy)
   FROM populations
   WHERE year = 2015);
"
df = pd.read_sql(text(query),eg)
df

In [None]:
#########################################################              REVIEW Sub Queries inside SELECT            #####################################
-- Select relevant fields from cities table
SELECT name, country_code, urbanarea_pop
FROM cities
-- Filter using a subquery on the countries table
WHERE name IN
  (SELECT capital
   FROM countries)
ORDER BY urbanarea_pop DESC;

In [None]:
#########################################################              REVIEW Sub Queries inside SELECT              #####################################
-- Find top nine countries with the most cities
SELECT countries.name AS country, COUNT(*) AS cities_num
FROM countries
LEFT JOIN cities
ON countries.code = cities.country_code
GROUP BY country
-- Order by count of cities as cities_num
ORDER BY cities_num DESC, country
LIMIT 9;

In [None]:
#########################################################              REVIEW Sub Queries inside SELECT               #####################################
SELECT countries.name AS country,
-- Subquery that provides the count of cities   
  (SELECT COUNT(*)
   FROM cities
   WHERE cities.country_code = countries.code) AS cities_num
FROM countries
ORDER BY cities_num DESC, country
LIMIT 9;

In [None]:
#########################################################               Sub Queries inside FROM               #####################################

SELECT DISTINCT 
FROM
   (SELECT
    FROM
    GROUP BY
WHERE ___=___
ORDER BY



In [None]:
#########################################################               Sub Queries inside FROM               #####################################

-- Select local_name and lang_num from appropriate tables
SELECT local_name, sub.lang_num
FROM countries,
    (SELECT code, COUNT(*) AS lang_num
     FROM languages
     GROUP BY code) AS sub
-- Where codes match    
WHERE countries.code = sub.code
ORDER BY lang_num DESC;



-- Select relevant fields
SELECT code, inflation_rate, unemployment_rate
FROM economies
WHERE year = 2015 
  AND code IN
-- Subquery returning country codes filtered on gov_form
    (SELECT code
     FROM countries
     WHERE (gov_form LIKE '%Monarchy%' OR gov_form LIKE '%Republic%'))
ORDER BY inflation_rate;

In [None]:
#####           FINAL QUESTION

-- Select fields from cities
SELECT name, country_code, city_proper_pop, metroarea_pop, city_proper_pop / metroarea_pop * 100 AS city_perc 
FROM cities
-- Use subquery to filter city name
WHERE name IN
   (SELECT capital
    FROM countries
    WHERE countries.capital = cities.name )
-- Add filter condition such that metroarea_pop does not have null values
AND metroarea_pop IS NOT NULL
-- Sort and limit the result
ORDER BY city_perc DESC
LIMIT 10


#The right answer is
-- Select fields from cities
SELECT 
	name, 
    country_code, 
    city_proper_pop, 
    metroarea_pop,
    city_proper_pop / metroarea_pop * 100 AS city_perc
FROM cities
-- Use subquery to filter city name
WHERE name IN
  (SELECT capital
   FROM countries
   WHERE (continent = 'Europe'
   OR continent LIKE '%America'))
-- Add filter condition such that metroarea_pop does not have null values
	  AND metroarea_pop IS NOT NULL
-- Sort and limit the result
ORDER BY city_perc DESC
LIMIT 10;
