<img src= "/files/tables/avatar.jpg" width="100" height="100" />
 
```

Name:         5-use-tables-not-files

Design Phase:
    Author:   John Miner
    Date:     12-01-2020
    Purpose:  External hive tables

Learning Guide:
    1 - Create hive database
    2 - Create hive tables
    3 - Explore records
    4 - More advance sql example
    
```

In [0]:
#
# 1 - Create new hive database
#

In [0]:
%sql
DROP DATABASE IF EXISTS talks CASCADE

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS talks


In [0]:
%fs
ls /lake/bronze


path,name,size
dbfs:/lake/bronze/amazon/,amazon/,0
dbfs:/lake/bronze/diamonds/,diamonds/,0
dbfs:/lake/bronze/loan/,loan/,0
dbfs:/lake/bronze/power/,power/,0
dbfs:/lake/bronze/weather/,weather/,0


In [0]:
#
# 2 - Create hive tables
#

In [0]:
%sql
CREATE TABLE talks.amazon_products
  USING PARQUET
  LOCATION '/lake/bronze/amazon/product-data.parquet'


In [0]:
%sql
CREATE TABLE talks.diamond_data 
  USING PARQUET
  LOCATION '/lake/bronze/diamonds/diamonds-data.parquet'


In [0]:
%sql
CREATE TABLE talks.loan_club
  USING PARQUET
  LOCATION '/lake/bronze/loan/club-data.parquet'

In [0]:
%sql
CREATE TABLE talks.power_plant
  USING PARQUET
  LOCATION '/lake/bronze/power/plant-data.parquet'

In [0]:
%sql
CREATE TABLE talks.weather_observations
  USING PARQUET
  LOCATION '/lake/bronze/weather/temperature-data.parquet'

In [0]:
#
# 2 - Explore tables
#

In [0]:
%sql
select * from talks.amazon_products limit 5

asin,brand,price,rating
B00014JKG0,Nature's Gate,10.02,5.0
B000L596FE,Luxor Pro,8.97,5.0
B008FXKOI2,Greenies,4.72,4.0
B000UJW676,Nelson,6.39,1.0
B004TK0IG8,Stanley,99.99,4.0


In [0]:
%sql
select * from talks.diamond_data limit 5

_c0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [0]:
%sql
select * from talks.loan_club limit 5

loan_status,int_rate,revol_util,issue_year,earliest_year,credit_length_in_years,emp_length,verification_status,total_pymnt,loan_amnt,grade,annual_inc,dti,addr_state,term,home_ownership,purpose,application_type,delinq_2yrs,total_acc,bad_loan
Current,20.39,98.4,18.0,5.0,13.0,3.0,Source Verified,741.6,10000,D,26000.0,56.0,VT,36 months,OWN,debt_consolidation,Joint App,0,16,False
Current,13.06,69.9,18.0,0.0,18.0,6.0,Verified,882.34,20000,C,94000.0,22.29,CA,60 months,MORTGAGE,debt_consolidation,Individual,1,16,False
Current,10.56,32.3,18.0,5.0,13.0,8.0,Not Verified,586.25,14000,B,98000.0,16.02,PA,60 months,MORTGAGE,credit_card,Individual,0,16,False
Current,6.83,7.4,18.0,1.0,17.0,2.0,Not Verified,486.73,8000,A,144000.0,22.23,TX,36 months,MORTGAGE,debt_consolidation,Individual,0,34,False
Current,17.47,60.1,18.0,3.0,15.0,,Source Verified,1061.98,22000,D,60000.0,31.6,IN,60 months,OWN,credit_card,Individual,0,18,False


In [0]:
%sql
select * from talks.power_plant limit 5

AT,V,AP,RH,PE
9.59,38.56,1017.01,60.1,481.3
12.04,42.34,1019.72,94.67,465.36
13.87,45.08,1024.42,81.69,465.48
13.72,54.3,1017.89,79.08,467.05
15.14,49.64,1023.78,75.0,463.58


In [0]:
%sql
select * from talks.weather_observations

obs_date,obs_high_temp,obs_low_temp
2015-01-01,42,26
2015-01-02,42,32
2015-01-03,41,35
2015-01-04,51,38
2015-01-05,54,49
2015-01-06,54,43
2015-01-07,46,42
2015-01-08,46,35
2015-01-09,50,38
2015-01-10,46,43


In [0]:
#
# 3 - More advance SQL
#

In [0]:
%sql
select 
  addr_state,
  bad_loan,
  count(loan_amnt) as num_loans,
  round(avg(loan_amnt), 4) as avg_amount 
from 
  talks.loan_club
where
  addr_state in ('RI', 'CT', 'MA', 'NH', 'VT', 'ME')
group by
  addr_state,
  bad_loan
having
  count(loan_amnt) > 0
order by
  addr_state,
  bad_loan

addr_state,bad_loan,num_loans,avg_amount
CT,False,2065,15731.9613
CT,True,38,19080.2632
MA,False,2973,16018.4326
MA,True,65,16270.0
ME,False,448,14768.8616
ME,True,3,21333.3333
NH,False,687,15616.7394
NH,True,11,19636.3636
RI,False,611,15012.6432
RI,True,8,19150.0


In [0]:
%sql
select
  addr_state,
  case
    when earliest_year < 50 then earliest_year + 2000
    else  earliest_year + 1900
  end as loan_year,
  loan_amnt
from
  talks.loan_club

addr_state,loan_year,loan_amnt
PA,2015.0,20500
GA,1999.0,20800
AZ,2006.0,5250
CO,2008.0,16800
OK,1980.0,20000
MI,2001.0,19000
AL,2010.0,10000
OK,2008.0,20000
TX,2007.0,10000
TN,2008.0,24000


In [0]:
%sql
with cte_loan_data
as
(
select
  addr_state,
  case
    when earliest_year < 50 then earliest_year + 2000
    else  earliest_year + 1900
  end as loan_year,
  loan_amnt
from
  nyc.loan_club_process_02
)
select
  row_number() over ( PARTITION BY (addr_state, loan_year) ORDER BY loan_year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) loan_id ,
  addr_state,
  loan_year,
  loan_amnt,
  SUM(loan_amnt) over ( PARTITION BY (addr_state, loan_year) ORDER BY loan_year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) run_amt
from
  cte_loan_data
where
  addr_state in ('RI', 'CT', 'MA') and loan_year > 2010
order by
  addr_state,
  loan_year,
  loan_id


loan_id,addr_state,loan_year,loan_amnt,run_amt
1,CT,2011.0,40000,40000.0
2,CT,2011.0,5600,45600.0
3,CT,2011.0,10000,55600.0
4,CT,2011.0,5000,60600.0
5,CT,2011.0,14000,74600.0
6,CT,2011.0,35000,109600.0
7,CT,2011.0,28000,137600.0
8,CT,2011.0,6000,143600.0
9,CT,2011.0,12000,155600.0
10,CT,2011.0,15000,170600.0
