In [1]:
import sqlite3
import pandas as pd

In [2]:
conn = sqlite3.connect('nba.db')
conn.execute("""ATTACH 'covid19.db' AS covid19""")
conn.execute("""ATTACH 'twElection2020.db' AS twElection2020""")
conn.execute("""ATTACH 'imdb.db' AS imdb""")

<sqlite3.Cursor at 0x7fc4235fa110>

###### Overview
1. 練習使用 JOIN 將資料表連結起來，並且使用 ON 以兩個的共同欄位合在一起，做水平整合
2. 練習使用 UNION 來將資料表做垂直整合

## 35. 從 covid19 資料庫查詢截至 2021-03-31 全球前十大確診人數的國家
* 練習JOIN的使用
* 兩個 Table
    1. lookup_table : 以Combined_Key做連結，叫出 Country_Region
    2. daily_report : 以Combined_Key做連結，並且透過SUM(Confirmed)計算每個地區的總確診數
* 以 Country_Region 做 GROUP BY

In [3]:
find_top_ten_confirmed_country_region_from_covid19 =\
"""
-- SQL 查詢語法起點
  
SELECT lookup_table.Country_Region,
       SUM(daily_report.Confirmed) AS total_confirmed
  FROM lookup_table
  JOIN daily_report
    ON lookup_table.Combined_Key = daily_report.Combined_Key
  GROUP BY lookup_table.Country_Region
  ORDER BY total_confirmed DESC
 LIMIT 10;
-- SQL 查詢語法終點
"""

top_ten_confirmed_country_region_from_covid19 = pd.read_sql(find_top_ten_confirmed_country_region_from_covid19, conn)
top_ten_confirmed_country_region_from_covid19


Unnamed: 0,Country_Region,total_confirmed
0,US,30459802
1,Brazil,12748747
2,India,12149335
3,France,4705068
4,Russia,4494234
5,United Kingdom,4359982
6,Italy,3584899
7,Turkey,3317182
8,Spain,3284353
9,Germany,2843644


## 36. 從 twElection2020 資料庫查詢中國國民黨、民主進步黨與親民黨在不分區立委與區域立委的得票率
* 練習UNION還有複習子查詢
* 4個 Table
    1. parties : 顯示id跟中文名字的關聯，並且以id與其他關聯表串接
    2. candidates : 顯示candidate_id跟name的關聯，以candidate與其他關聯表串接(與區域立委相連)
    3. legislative_regional : 顯示區域立委的得票跟candidate_id (與candidates串接)
    4. legislative_at_large 顯示不分區立委的得票跟party_id (跟parties相連)


In [8]:
summarize_party_votes_percentages_from_twelection2020 =\
"""
-- SQL 查詢語法起點

SELECT parties.party, -- 不分區立委
       '不分區立委' AS election,
       ROUND(CAST(SUM(legislative_at_large.votes) AS REAL) / (SELECT SUM(votes) FROM legislative_at_large), 4) AS votes_percentage
  FROM legislative_at_large
  JOIN parties
    ON legislative_at_large.party_id = parties.id
 WHERE party IN ('中國國民黨', '民主進步黨', '親民黨')
 GROUP BY party_id
 UNION 
SELECT parties.party, -- 區域立委
       '區域立委' AS election,
       ROUND(SUM(legislative_regional.votes) / (SELECT CAST(SUM(votes) AS REAL) FROM legislative_regional), 4)
  FROM candidates
  JOIN legislative_regional
    ON candidates.id = legislative_regional.candidate_id
  JOIN parties
    ON candidates.party_id = parties.id
  WHERE party IN ('中國國民黨', '民主進步黨', '親民黨')
  GROUP BY candidates.party_id
  ORDER BY election;

-- SQL 查詢語法終點
"""

party_votes_percentages_from_twelection2020 = pd.read_sql(summarize_party_votes_percentages_from_twelection2020, conn)
party_votes_percentages_from_twelection2020



Unnamed: 0,party,election,votes_percentage
0,中國國民黨,不分區立委,0.3336
1,民主進步黨,不分區立委,0.3398
2,親民黨,不分區立委,0.0366
3,中國國民黨,區域立委,0.4071
4,民主進步黨,區域立委,0.4511
5,親民黨,區域立委,0.0043


## 37從 nba 資料庫查詢截至 2021-03-31 洛杉磯湖人隊（Los Angeles Lakers）球員的生涯場均得分 ppg

* 練習JOIN的使用以及複習文字串接的方法
* 三個關聯表
    1. career_summaries : 顯示球員ID以及ppg (用id與players的id串接)
    2. players : 顯示球員id跟隊伍id還有名字 (用id與teams以及player串接)
    3. teams : 顯示team_id 跟 team_name


In [9]:
list_lakers_ppg_from_nba =\
"""
-- SQL 查詢語法起點

SELECT teams.fullName AS team_name,
       players.firstName || ' ' || players.lastName AS player_name,
       ppg
  FROM career_summaries
 JOIN players
   ON career_summaries.personId = players.personId
 JOIN teams
   ON players.teamId = teams.teamId
 WHERE teams.fullName = 'Los Angeles Lakers'
 ORDER BY ppg DESC;
 
-- SQL 查詢語法終點
"""
lakers_ppg_from_nba = pd.read_sql(list_lakers_ppg_from_nba, conn)
lakers_ppg_from_nba

Unnamed: 0,team_name,player_name,ppg
0,Los Angeles Lakers,LeBron James,27.0
1,Los Angeles Lakers,Anthony Davis,23.9
2,Los Angeles Lakers,Kyle Kuzma,15.4
3,Los Angeles Lakers,Andre Drummond,14.6
4,Los Angeles Lakers,Marc Gasol,14.2
5,Los Angeles Lakers,Dennis Schroder,14.2
6,Los Angeles Lakers,Montrezl Harrell,13.0
7,Los Angeles Lakers,Wesley Matthews,12.7
8,Los Angeles Lakers,Kentavious Caldwell-Pope,11.3
9,Los Angeles Lakers,Markieff Morris,11.1


## 38. 從 nba 資料庫查詢各個球隊的得分王（生涯場均得分 ppg 全隊最高）是誰，將查詢結果依隊伍名排序

* 練習JOIN與複習GROUP BY
* 三個關聯表
    1. career_summaries : 顯示球員ID以及ppg (用id與players的id串接)
    2. players : 顯示球員id跟隊伍id還有名字 (用id與teams以及player串接)
    3. teams : 顯示team_id 跟 team_name
* 先串接起來後用teamId做 GROUP BY 就可以叫出每個球隊的某個特定標的

In [10]:
list_ppg_leader_by_teams_from_nba =\
"""
-- SQL 查詢語法起點

SELECT teams.fullName,
       players.firstName || ' ' || players.lastName AS player,
       MAX(career_summaries.ppg)  AS ppg
  FROM career_summaries
 JOIN players
   ON career_summaries.personId = players.personId
 JOIN teams
   ON players.teamId = teams.teamId
 GROUP BY players.teamId
 ORDER BY teams.fullName;
       

-- SQL 查詢語法終點
"""

ppg_leader_by_teams_from_nba = pd.read_sql(list_ppg_leader_by_teams_from_nba, conn)
ppg_leader_by_teams_from_nba

Unnamed: 0,fullName,player,ppg
0,Atlanta Hawks,Trae Young,24.0
1,Boston Celtics,Kemba Walker,19.8
2,Brooklyn Nets,Kevin Durant,27.1
3,Charlotte Hornets,LaMelo Ball,15.9
4,Chicago Bulls,Zach LaVine,18.8
5,Cleveland Cavaliers,Collin Sexton,19.7
6,Dallas Mavericks,Luka Doncic,25.6
7,Denver Nuggets,Nikola Jokic,18.0
8,Detroit Pistons,Josh Jackson,12.0
9,Golden State Warriors,Stephen Curry,23.8


## 39. 從 imdb 資料庫中查詢 Tom Hanks 與 Leonardo DiCaprio 在 IMDb.com 最高評價的 250 部電影中演出哪些電影，依據 casting 資料表中的 ord 衍生計算欄位 is_lead_actor 註記是否為第一主角（ord 若為 1 表示為第一主角），將查詢結果依 release_year 排序
* 練習JOIN與複習CASE
* 3個關聯表
    1. actors : 顯示 id 跟 演員名字(用id跟casting串接)
    2. casting : 顯示 movie_id, actor_id, ord (用兩個id分別跟movies, actors串接)，並且使用ord去判斷是否為主角
    3. movies : 顯示 id跟title還有release_year (用id跟casting串接)

In [11]:
list_movies_in_which_tom_and_leonardo_appeared =\
"""
-- SQL 查詢語法起點

SELECT movies.release_year,
       movies.title,
       actors.name,
       CASE WHEN casting.ord = 1 THEN 1
            ELSE 0 END AS is_lead_actor
  FROM actors
 JOIN casting
   ON actors.id = casting.actor_id
 JOIN movies
   ON casting.movie_id = movies.id
 WHERE actors.name IN ('Tom Hanks', 'Leonardo DiCaprio')
 ORDER BY movies.release_year;

-- SQL 查詢語法終點
"""
movies_in_which_tom_and_leonardo_appeared = pd.read_sql(list_movies_in_which_tom_and_leonardo_appeared, conn)
movies_in_which_tom_and_leonardo_appeared

Unnamed: 0,release_year,title,name,is_lead_actor
0,1994,Forrest Gump,Tom Hanks,1
1,1995,Toy Story,Tom Hanks,1
2,1998,Saving Private Ryan,Tom Hanks,1
3,1999,The Green Mile,Tom Hanks,1
4,2002,Catch Me If You Can,Leonardo DiCaprio,1
5,2002,Catch Me If You Can,Tom Hanks,0
6,2006,The Departed,Leonardo DiCaprio,1
7,2010,Inception,Leonardo DiCaprio,1
8,2010,Toy Story 3,Tom Hanks,1
9,2010,Shutter Island,Leonardo DiCaprio,1
