In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from glob import glob

# conda install sqlalchemy, psycopg2
from sqlalchemy import create_engine

# pip install psycopg2-binary -U
import psycopg2

from tqdm import tqdm, trange

%matplotlib inline
%config InlineBackend.figure_format='retina'

In [10]:
#### magic command (%)
%load_ext sql

import getpass
from sqlalchemy import create_engine
# host='localhost'
host='localhost'
port=5432

user=getpass.getpass('user: ')
pwd=getpass.getpass('password: ')

# create connection to yummi
dbname='yummi'
connection_string=f'postgresql+psycopg2://{user}:{pwd}@{host}:{port}/{dbname}' # for %sql magic
%sql $connection_string
con1=create_engine(connection_string) # for sqlalchemy

# create connection to disney
dbname='disney'
connection_string=f'postgresql+psycopg2://{user}:{pwd}@{host}:{port}/{dbname}' # for %sql magic
%sql $connection_string

%config SqlMagic.autopandas = True

con2=create_engine(connection_string) # for sqlalchemy

con=con1

In [13]:
import socket
hostname = socket.gethostname()
ip_address = socket.gethostbyname(hostname)
print(f"Hostname: {hostname}")
print(f"IP Address: {ip_address}")

print(pd.Timestamp.now())

Hostname: Guy.local
IP Address: 127.0.0.1
2021-11-02 02:50:16.216564


In [14]:
%sql postgres@disney
# %sql postgres@yummi

In [15]:
def list_db(con):
    '''
    \l
    list databases
    '''
    sql=f'''
    SELECT datname, datctype, datacl FROM pg_database order by datname;
    '''
#     print(sql)
    return pd.read_sql(sql, con)

def list_table(con):
    '''
    \d
    list tables in connected database
    '''
    sql=f'''
    SELECT table_catalog, table_schema, table_name
        FROM information_schema.tables 
        WHERE table_type = 'BASE TABLE' 
            AND table_schema not in ('information_schema', 'pg_catalog')
        ORDER BY table_type, table_name
    '''
    return pd.read_sql(sql, con)

def describe_table(table_name, con):
    '''
    \d+ table_name
    describe a table
    '''
    sql=f'''
    SELECT table_name, 
        case 
           when character_maximum_length is null then column_name
           else column_name || '(' || character_maximum_length || ')'
        end as col_name,    
        data_type, is_nullable, column_default default_value
        -- character_maximum_length, numeric_precision, datetime_precision,
    FROM 
       information_schema.columns
    WHERE 
       table_name = '{table_name}'
    ORDER BY table_name;
    '''
#     print(sql)
    return pd.read_sql(sql, con)

In [11]:
!pip install -U psycopg2-binary ipython-sql
!pip install -U pgspecial --no-deps

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.1-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 1.4 MB/s 
Installing collected packages: psycopg2-binary
  Attempting uninstall: psycopg2-binary
    Found existing installation: psycopg2-binary 2.8.5
    Uninstalling psycopg2-binary-2.8.5:
      Successfully uninstalled psycopg2-binary-2.8.5
Successfully installed psycopg2-binary-2.9.1
Collecting pgspecial
  Downloading pgspecial-1.13.0-py3-none-any.whl (35 kB)
Installing collected packages: pgspecial
  Attempting uninstall: pgspecial
    Found existing installation: pgspecial 1.11.5
    Uninstalling pgspecial-1.11.5:
      Successfully uninstalled pgspecial-1.11.5
Successfully installed pgspecial-1.13.0


In [17]:
%%sql
\d

 * postgresql+psycopg2://postgres:***@localhost:5432/disney
   postgresql+psycopg2://postgres:***@localhost:5432/yummi
5 rows affected.


Unnamed: 0,Schema,Name,Type,Owner
0,public,director,table,postgres
1,public,disney_char,table,postgres
2,public,movie_gross,table,postgres
3,public,revenue,table,postgres
4,public,voice_actor,table,postgres


In [16]:
%%sql
\dt

 * postgresql+psycopg2://postgres:***@localhost:5432/disney
   postgresql+psycopg2://postgres:***@localhost:5432/yummi
5 rows affected.


Unnamed: 0,Schema,Name,Type,Owner
0,public,director,table,postgres
1,public,disney_char,table,postgres
2,public,movie_gross,table,postgres
3,public,revenue,table,postgres
4,public,voice_actor,table,postgres


In [18]:
%sql \l

 * postgresql+psycopg2://postgres:***@localhost:5432/disney
   postgresql+psycopg2://postgres:***@localhost:5432/yummi
5 rows affected.


Unnamed: 0,Name,Owner,Encoding,Collate,Ctype,Access privileges
0,disney,postgres,UTF8,en_US.UTF-8,en_US.UTF-8,
1,postgres,postgres,UTF8,en_US.UTF-8,en_US.UTF-8,
2,template0,postgres,UTF8,en_US.UTF-8,en_US.UTF-8,=c/postgres\npostgres=CTc/postgres
3,template1,postgres,UTF8,en_US.UTF-8,en_US.UTF-8,=c/postgres\npostgres=CTc/postgres
4,yummi,postgres,UTF8,en_US.UTF-8,en_US.UTF-8,


In [19]:
%%sql 
\d+ movie_gross

 * postgresql+psycopg2://postgres:***@localhost:5432/disney
   postgresql+psycopg2://postgres:***@localhost:5432/yummi
6 rows affected.


Unnamed: 0,Column,Type,Modifiers,Storage,Stats target,Description
0,movie_title,character varying(40),,extended,,
1,release_date,date,,plain,,
2,genre,character varying(19),,extended,,
3,mpaa_rating,character varying(9),,extended,,
4,total_gross,bigint,,plain,,
5,inflation_adjusted_gross,bigint,,plain,,


# Question 1
จากตาราง movie_gross ให้หาหนังที่มีคำว่า toy แต่ไม่มีคำว่า story

In [21]:
%%sql
select movie_title from movie_gross where movie_title ilike '%toy%'
except
select movie_title from movie_gross where movie_title ilike '%story%'

 * postgresql+psycopg2://postgres:***@localhost:5432/disney
   postgresql+psycopg2://postgres:***@localhost:5432/yummi
1 rows affected.


Unnamed: 0,movie_title
0,Babes in Toyland


# Question 2 
จำนวนหนังที่เริ่มออกฉายแต่ละช่วงเวลา พร้อมผลรวม

In [20]:
%%sql
with cte as (select
    case
        when extract(year from release_date)::int < 1970 then '< 1970'
        when extract(year from release_date)::int between 1970 and 1979 then '1970s'
        when extract(year from release_date)::int between 1980 and 1989 then '1980s'
        when extract(year from release_date)::int between 1990 and 1999 then '1990s'
        when extract(year from release_date)::int between 2000 and 2009 then '2000s'
        when extract(year from release_date)::int between 2010 and 2019 then '2010s'
    end period,
    count(*) movies
    from movie_gross
    group by period
    UNION
    select 'All time' period, count(*) movies
        from movie_gross order by period
    )
select period, movies, to_char(movies * 100.0 / (select count(*) from movie_gross), '999D99 "%"') "% of All time" 
from cte

 * postgresql+psycopg2://postgres:***@localhost:5432/disney
   postgresql+psycopg2://postgres:***@localhost:5432/yummi
7 rows affected.


Unnamed: 0,period,movies,% of All time
0,1970s,9,1.55 %
1,1980s,61,10.54 %
2,1990s,236,40.76 %
3,2000s,172,29.71 %
4,2010s,86,14.85 %
5,< 1970,15,2.59 %
6,All time,579,100.00 %


# Question 3
