In [1]:
import sqlite3
import pandas as pd

In [2]:
conn = sqlite3.connect('nba.db')
conn.execute("""ATTACH 'covid19.db' AS covid19""")
conn.execute("""ATTACH 'twElection2020.db' AS twElection2020""")
conn.execute("""ATTACH 'imdb.db' AS imdb""")

<sqlite3.Cursor at 0x7f925e86a2d0>

###### Overview
1. 練習使用CASE WHEN ... THEN ... 來對資料進行分類(在THEN的地方可以設定成自己想要的資料 EX X = 1 THEN "皮卡丘" ... )
2. 練習設定門檻(若條件有交集，採取先搶先贏的制度)，若不想要擔心交集的問題，就把門檻設定成互斥
* EX : CASE WHEN weight > 50 THEN "Normal" WHEN weight > 70 THEN "Fat" (此時若體重大於50 跟 70 都會被歸類成 Normal)
* 可以把順續反過來，抑或是 CASE WHEN weight > 50 AND weight < 70 THEN "Normal"，使用這樣的區間來寫

### 22. 從 covid19 資料庫的 daily_report 資料表將「美國」與「非美國」的觀測值用衍生計算欄位區分，美國的觀測值給予整數 1、非美國的觀測值給予整數 0。

* 練習使用CASE WHEN condition_1 THEN result 1 ELSE result 2 END AS ...

In [3]:
# 寫法1 - 使用SUBSTR做文字切割
case_is_us_from_daily_report =\
"""
-- SQL 查詢語法起點

SELECT Combined_Key,
       CASE WHEN SUBSTR(Combined_Key, LENGTH(Combined_Key) - 1, LENGTH(Combined_Key)) = 'US' THEN 1
            ELSE 0 END AS is_us
  FROM daily_report
 ORDER BY is_us;

-- SQL 查詢語法終點
"""

is_us_from_daily_report = pd.read_sql(case_is_us_from_daily_report, conn)
is_us_from_daily_report

Unnamed: 0,Combined_Key,is_us
0,"Abruzzo, Italy",0
1,"Acre, Brazil",0
2,"Adygea Republic, Russia",0
3,Afghanistan,0
4,"Aguascalientes, Mexico",0
...,...,...
3976,"Yuma, Arizona, US",1
3977,"Yuma, Colorado, US",1
3978,"Zapata, Texas, US",1
3979,"Zavala, Texas, US",1


In [4]:
# 寫法2 - 使用 LIKE 語法

case_is_us_from_daily_report =\
"""
-- SQL 查詢語法起點

SELECT Combined_Key,
       CASE WHEN Combined_Key LIKE '%, US' THEN 1
            ELSE 0 END AS is_us
  FROM daily_report
 ORDER BY is_us;

-- SQL 查詢語法終點
"""

is_us_from_daily_report = pd.read_sql(case_is_us_from_daily_report, conn)
is_us_from_daily_report

Unnamed: 0,Combined_Key,is_us
0,"Abruzzo, Italy",0
1,"Acre, Brazil",0
2,"Adygea Republic, Russia",0
3,Afghanistan,0
4,"Aguascalientes, Mexico",0
...,...,...
3976,"Yuma, Arizona, US",1
3977,"Yuma, Colorado, US",1
3978,"Zapata, Texas, US",1
3979,"Zavala, Texas, US",1


### 23. 從 imdb 資料庫的 movies 資料表將評等超過 8.7（>8.7）的電影分類為 'Awesome'、將評等超過 8.4（>8.4）的電影分類為 'Terrific'，再將其餘的電影分類為 'Great'

* 練習區間的文法撰寫(避免有交集然後取代)

In [5]:
case_rating_category_from_movies =\
"""
-- SQL 查詢語法起點

SELECT title,
       rating,
       CASE WHEN rating > 8.7 THEN 'Awesome'
            WHEN rating > 8.4 THEN 'Terrific'
            ELSE 'Great' END AS rating_category
  FROM movies;

-- SQL 查詢語法終點
"""

rating_category_from_movies = pd.read_sql(case_rating_category_from_movies, conn)
rating_category_from_movies

Unnamed: 0,title,rating,rating_category
0,The Shawshank Redemption,9.3,Awesome
1,The Godfather,9.2,Awesome
2,The Godfather: Part II,9.0,Awesome
3,The Dark Knight,9.0,Awesome
4,12 Angry Men,9.0,Awesome
...,...,...,...
245,Neon Genesis Evangelion: The End of Evangelion,8.1,Great
246,7 Kogustaki Mucize,8.2,Great
247,Tangerines,8.2,Great
248,Drishyam,8.2,Great


### 24. 從 twElection2020 資料庫的 admin_regions 資料表將 county 分類為 '六都'與'非六都'
* 結合 DISTINCT + CASE 做分類

In [6]:
case_county_type_from_admin_regions =\
"""
-- SQL 查詢語法起點

SELECT DISTINCT county,
       CASE WHEN county IN ('臺北市', '新北市', '桃園市', '臺中市', '臺南市', '高雄市') THEN '六都'
            ELSE '非六都' END AS county_type
  FROM admin_regions
 ORDER BY county_type


-- SQL 查詢語法終點
"""

county_type_from_admin_regions = pd.read_sql(case_county_type_from_admin_regions, conn)
county_type_from_admin_regions

Unnamed: 0,county,county_type
0,新北市,六都
1,桃園市,六都
2,臺中市,六都
3,臺北市,六都
4,臺南市,六都
5,高雄市,六都
6,南投縣,非六都
7,嘉義市,非六都
8,嘉義縣,非六都
9,基隆市,非六都
