<img src= "/files/tables/avatar.jpg" width="100" height="100" />
 
```

Name:         4-use-of-temporary-views

Design Phase:
    Author:   John Miner
    Date:     12-01-2020
    Purpose:  Exposing files as views

Learning Guide:
    1 - Add new directory
    2 - Process weather data using sql
    3 - Process loan data using sql
    
```

In [0]:
%run "./n-tool-box-code"

In [0]:
#
# 1 - add directory
#

# show directories
dbutils.fs.ls("/lake/bronze")

# make new directory
dbutils.fs.mkdirs("/lake/bronze/loan")

# show directories
dbutils.fs.ls("/lake/bronze")



In [0]:
#
# 2 - Weather Data
#

In [0]:
# read in low temps
path1 = "/databricks-datasets/weather/low_temps"
df1 = (
  spark.read                    
  .option("sep", ",")        
  .option("header", "true")
  .option("inferSchema", "true")  
  .csv(path1)               
)

In [0]:
# create temp view
df1.createOrReplaceTempView("tmp_low_temps")

In [0]:
# read in low temps
path2 = "/databricks-datasets/weather/high_temps"
df2 = (
  spark.read                    
  .option("sep", ",")        
  .option("header", "true")
  .option("inferSchema", "true")  
  .csv(path2)               
)

In [0]:
# create temp view
df2.createOrReplaceTempView("tmp_high_temps")

In [0]:
%sql
select 
  l.date as obs_date,
  h.temp as obs_high_temp,
  l.temp as obs_low_temp
from 
  tmp_high_temps as h
join
  tmp_low_temps as l
on
  h.date = l.date


obs_date,obs_high_temp,obs_low_temp
2015-01-01T00:00:00.000+0000,42,26
2015-01-02T00:00:00.000+0000,42,32
2015-01-03T00:00:00.000+0000,41,35
2015-01-04T00:00:00.000+0000,51,38
2015-01-05T00:00:00.000+0000,54,49
2015-01-06T00:00:00.000+0000,54,43
2015-01-07T00:00:00.000+0000,46,42
2015-01-08T00:00:00.000+0000,46,35
2015-01-09T00:00:00.000+0000,50,38
2015-01-10T00:00:00.000+0000,46,43


In [0]:
# make sql string
sql_stmt = """
  select 
    l.date as obs_date,
    h.temp as obs_high_temp,
    l.temp as obs_low_temp
  from 
    tmp_high_temps as h
  join
    tmp_low_temps as l
  on
    h.date = l.date
"""

# execute
df = spark.sql(sql_stmt)

In [0]:
# Write out csv file
path = "/lake/bronze/weather/temp"
(
  df.repartition(1).write
    .format("parquet")
    .mode("overwrite")
    .save(path)
)

In [0]:
# create single file
unwanted_file_cleanup("/lake/bronze/weather/temp/", "/lake/bronze/weather/temperature-data.parquet", "parquet")

In [0]:
dbutils.fs.ls("/lake/bronze/weather/")

In [0]:
#
# 3 - Loan Data
#

In [0]:
# read in low temps
path3 = "/databricks-datasets/lending-club-loan-stats/LoanStats_2018Q2.csv"
df3 = (
  spark.read                    
  .option("sep", ",")        
  .option("header", "true")
  .option("inferSchema", "true")  
  .csv(path3)               
)

In [0]:
# create temp view
df3.createOrReplaceTempView("tmp_loan_club")


In [0]:
df3.printSchema()

In [0]:
%sql
select * from tmp_loan_club

id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
,,10000,10000,10000,36 months,20.39%,373.63,D,D4,Realtor,3 years,OWN,26000.0,Source Verified,Jun-18,Current,n,,,debt_consolidation,Debt consolidation,058xx,VT,56.0,0,Apr-05,0,,,10,0,35130,98.40%,16,w,9589.1,9589.1,741.6,741.6,410.9,330.7,0.0,0.0,0.0,Sep-18,373.63,Oct-18,Sep-18,0,,1,Joint App,53000.0,30.14,Source Verified,0,0,64253,1,3,0,1,23.0,29123,69.0,1,6,1103,82.0,35700,0,0,1,7,6425,133.0,94.0,0,0,158.0,37,1,1,1,20.0,,12.0,,0,2,7,2,2,7,7,8,7,10,0.0,0,0,1,100.0,100.0,0,0,78151,64253,2200,42451,6881.0,Apr-05,0.0,0.0,5.0,99.2,1.0,4.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
,,20000,20000,20000,60 months,13.06%,455.68,C,C1,Business Analyst,6 years,MORTGAGE,94000.0,Verified,Jun-18,Current,n,,,debt_consolidation,Debt consolidation,956xx,CA,22.29,1,Sep-00,0,21.0,,12,0,51762,69.90%,16,w,19521.38,19521.38,882.34,882.34,478.62,403.72,0.0,0.0,0.0,Sep-18,455.68,Oct-18,Sep-18,0,,1,Individual,,,,0,0,520838,0,1,0,0,27.0,17705,70.0,0,1,21516,70.0,74000,0,0,1,2,43403,10021.0,82.0,0,0,49.0,213,14,8,1,14.0,,9.0,21.0,0,6,10,6,6,2,10,13,10,12,0.0,0,0,1,93.8,50.0,0,0,554201,69467,55700,25201,,,,,,,,,,,,N,,,,,,,,,,,,,,,DirectPay,N,,,,,,
,,14000,14000,14000,60 months,10.56%,301.34,B,B3,Product Manager,8 years,MORTGAGE,98000.0,Not Verified,Jun-18,Current,n,,,credit_card,Credit card refinancing,152xx,PA,16.02,0,Jun-05,0,,,12,0,21930,32.30%,16,w,13642.15,13642.15,586.25,586.25,357.85,228.4,0.0,0.0,0.0,Sep-18,301.34,Oct-18,Sep-18,0,,1,Individual,,,,0,0,188168,1,3,2,2,10.0,54827,89.0,1,1,9202,54.0,68000,1,1,1,3,17106,41570.0,34.5,0,0,142.0,156,4,4,1,4.0,,9.0,,0,5,5,7,7,6,8,9,5,12,0.0,0,0,3,100.0,16.7,0,0,277254,76757,63500,79008,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
,,8000,8000,8000,36 months,6.83%,246.4,A,A3,City Attorney,2 years,MORTGAGE,144000.0,Not Verified,Jun-18,Current,n,,,debt_consolidation,Debt consolidation,750xx,TX,22.23,0,May-01,0,60.0,,15,0,2851,7.40%,34,w,7597.12,7597.12,486.73,486.73,402.88,83.85,0.0,0.0,0.0,Sep-18,246.4,Oct-18,Sep-18,0,,1,Individual,,,,0,0,416635,1,6,2,5,6.0,252494,84.0,0,0,1917,47.0,38400,5,1,3,6,27776,23119.0,7.9,0,0,168.0,205,25,6,4,113.0,,6.0,,0,2,3,4,7,18,8,12,3,15,0.0,0,0,2,94.1,0.0,0,0,484141,255345,25100,282141,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
,,22000,22000,22000,60 months,17.47%,552.34,D,D1,Laborer,10+ years,OWN,60000.0,Source Verified,Jun-18,Current,n,,,credit_card,Credit card refinancing,476xx,IN,31.6,0,Mar-03,0,,86.0,9,1,19700,60.10%,18,w,21532.51,21532.51,1061.98,1061.98,467.49,594.49,0.0,0.0,0.0,Sep-18,552.34,Oct-18,Sep-18,0,,1,Individual,,,,0,0,120562,0,2,0,0,30.0,100862,41.0,1,3,5985,57.0,32800,0,1,3,3,13396,11686.0,62.1,0,0,79.0,183,8,8,0,8.0,,8.0,,0,6,7,6,7,9,7,9,7,9,0.0,0,0,1,100.0,33.3,1,0,142179,120562,30800,109379,,,,,,,,,,,,N,,,,,,,,,,,,,,,DirectPay,N,,,,,,
,,6000,6000,6000,36 months,16.46%,212.31,C,C5,,,OWN,20388.0,Source Verified,Jun-18,Current,n,,,debt_consolidation,Debt consolidation,321xx,FL,16.66,0,Jan-06,0,,,6,0,6103,75.30%,7,w,5738.2,5738.2,413.65,413.65,261.8,151.85,0.0,0.0,0.0,Sep-18,212.31,Oct-18,Sep-18,0,,1,Individual,,,,0,0,6103,1,0,0,0,149.0,0,,1,2,1804,75.0,8100,0,0,0,2,1017,380.0,92.2,0,0,149.0,63,3,3,0,3.0,,,,0,3,6,3,3,1,6,6,6,6,0.0,0,0,1,100.0,100.0,0,0,8100,6103,4900,0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
,,12000,12000,12000,60 months,19.42%,314.07,D,D3,Operations Manager,1 year,OWN,95000.0,Source Verified,Jun-18,Current,n,,,debt_consolidation,Debt consolidation,950xx,CA,0.58,0,Jun-90,0,29.0,,3,0,301,11.60%,7,w,11758.31,11758.31,666.99,666.99,241.69,425.3,0.0,0.0,0.0,Sep-18,314.07,Oct-18,Sep-18,0,66.0,1,Individual,,,,0,7735,301,1,0,0,0,,0,,1,2,301,12.0,2600,0,0,0,2,100,1099.0,21.5,0,0,,336,0,0,0,0.0,29.0,24.0,29.0,2,1,1,2,5,0,3,7,1,3,0.0,0,0,1,57.1,0.0,0,0,2600,301,1400,0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
,,35000,35000,35000,60 months,22.90%,984.66,E,E1,Systems Analyst,2 years,OWN,97000.0,Verified,Jun-18,Current,n,,,home_improvement,Home improvement,641xx,MO,11.41,0,Nov-01,0,39.0,,14,0,26674,30.90%,26,w,34360.47,34360.47,1880.26,1880.26,639.53,1240.73,0.0,0.0,0.0,Sep-18,984.66,Oct-18,Sep-18,0,39.0,1,Individual,,,,0,0,122457,2,2,0,0,129.0,39940,192.0,2,9,9402,62.0,86200,1,3,4,9,8747,30777.0,36.1,0,0,140.0,141,2,2,5,2.0,,0.0,,2,3,4,7,10,6,11,15,4,14,0.0,0,0,2,96.0,0.0,0,0,173294,66614,48200,20805,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
,,7200,7200,7200,36 months,5.31%,216.8,A,A1,Medical assistant,8 years,RENT,48000.0,Not Verified,Jun-18,Current,n,,,debt_consolidation,Debt consolidation,104xx,NY,10.15,0,Dec-13,0,,,8,0,10426,51.40%,9,w,6829.3,6829.3,429.35,429.35,370.7,58.65,0.0,0.0,0.0,Sep-18,216.8,Oct-18,Sep-18,0,,1,Individual,,,,0,0,21198,0,2,0,0,30.0,10772,113.0,0,1,4413,71.0,20300,0,1,0,1,2650,8576.0,54.9,0,0,53.0,37,24,24,0,24.0,,,,0,5,6,5,5,3,6,6,6,8,0.0,0,0,0,100.0,0.0,0,0,29800,21198,19000,9500,,,,,,,,,,,,N,,,,,,,,,,,,,,,DirectPay,N,,,,,,
,,5000,5000,5000,36 months,6.83%,154.0,A,A3,Crew member,10+ years,RENT,35000.0,Not Verified,Jun-18,Current,n,,,debt_consolidation,Debt consolidation,945xx,CA,33.74,0,Nov-07,0,,,16,0,7639,9.90%,25,w,4748.2,4748.2,304.21,304.21,251.8,52.41,0.0,0.0,0.0,Sep-18,154.0,Oct-18,Sep-18,0,,1,Individual,,,,0,0,20760,1,2,0,0,36.0,13121,29.0,1,5,5840,17.0,77100,0,0,0,5,1298,53356.0,12.4,0,0,77.0,127,5,5,0,5.0,,20.0,,0,3,4,10,13,4,14,21,4,16,0.0,0,0,1,100.0,10.0,0,0,122158,20760,60900,45058,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [0]:
%sql
select
  loan_status, 
  cast(regexp_replace(int_rate, '%', '') as float) as int_rate,
  cast(regexp_replace(revol_util, '%', '') as float) as revol_util,
  cast(substring(issue_d, 5, 4) as double) as issue_year,
  cast(substring(earliest_cr_line, 5, 4) as double) as earliest_year,
  cast(substring(issue_d, 5, 4) as double) -
  cast(substring(earliest_cr_line, 5, 4) as double) as credit_length_in_years,
  cast(regexp_replace(regexp_replace(regexp_replace(emp_length, "([ ]*+[a-zA-Z].*)|(n/a)", ""), "< 1", "0"), "10\\+", "10") as float) as emp_length,
  verification_status, 
  total_pymnt,
  loan_amnt, 
  grade, 
  annual_inc, 
  dti,
  addr_state,
  term,
  home_ownership, 
  purpose, 
  application_type, 
  delinq_2yrs, 
  total_acc,
  case
    when loan_status = "Current" then "false"
    when loan_status = "Fully Paid" then "false"
    else "true"
  end as bad_loan
from 
  tmp_loan_club


loan_status,int_rate,revol_util,issue_year,earliest_year,credit_length_in_years,emp_length,verification_status,total_pymnt,loan_amnt,grade,annual_inc,dti,addr_state,term,home_ownership,purpose,application_type,delinq_2yrs,total_acc,bad_loan
Current,20.39,98.4,18.0,5.0,13.0,3.0,Source Verified,741.6,10000,D,26000.0,56.0,VT,36 months,OWN,debt_consolidation,Joint App,0,16,False
Current,13.06,69.9,18.0,0.0,18.0,6.0,Verified,882.34,20000,C,94000.0,22.29,CA,60 months,MORTGAGE,debt_consolidation,Individual,1,16,False
Current,10.56,32.3,18.0,5.0,13.0,8.0,Not Verified,586.25,14000,B,98000.0,16.02,PA,60 months,MORTGAGE,credit_card,Individual,0,16,False
Current,6.83,7.4,18.0,1.0,17.0,2.0,Not Verified,486.73,8000,A,144000.0,22.23,TX,36 months,MORTGAGE,debt_consolidation,Individual,0,34,False
Current,17.47,60.1,18.0,3.0,15.0,10.0,Source Verified,1061.98,22000,D,60000.0,31.6,IN,60 months,OWN,credit_card,Individual,0,18,False
Current,16.46,75.3,18.0,6.0,12.0,,Source Verified,413.65,6000,C,20388.0,16.66,FL,36 months,OWN,debt_consolidation,Individual,0,7,False
Current,19.42,11.6,18.0,90.0,-72.0,1.0,Source Verified,666.99,12000,D,95000.0,0.58,CA,60 months,OWN,debt_consolidation,Individual,0,7,False
Current,22.9,30.9,18.0,1.0,17.0,2.0,Verified,1880.26,35000,E,97000.0,11.41,MO,60 months,OWN,home_improvement,Individual,0,26,False
Current,5.31,51.4,18.0,13.0,5.0,8.0,Not Verified,429.35,7200,A,48000.0,10.15,NY,36 months,RENT,debt_consolidation,Individual,0,9,False
Current,6.83,9.9,18.0,7.0,11.0,10.0,Not Verified,304.21,5000,A,35000.0,33.74,CA,36 months,RENT,debt_consolidation,Individual,0,25,False


In [0]:
# make sql string
sql_stmt = """
select
  loan_status, 
  cast(regexp_replace(int_rate, '%', '') as float) as int_rate,
  cast(regexp_replace(revol_util, '%', '') as float) as revol_util,
  cast(substring(issue_d, 5, 4) as double) as issue_year,
  cast(substring(earliest_cr_line, 5, 4) as double) as earliest_year,
  cast(substring(issue_d, 5, 4) as double) -
  cast(substring(earliest_cr_line, 5, 4) as double) as credit_length_in_years,
  cast(regexp_replace(regexp_replace(regexp_replace(emp_length, "([ ]*+[a-zA-Z].*)|(n/a)", ""), "< 1", "0"), "10\\+", "10") as float) as emp_length,
  verification_status, 
  total_pymnt,
  loan_amnt, 
  grade, 
  annual_inc, 
  dti,
  addr_state,
  term,
  home_ownership, 
  purpose, 
  application_type, 
  delinq_2yrs, 
  total_acc,
  case
    when loan_status = "Current" then "false"
    when loan_status = "Fully Paid" then "false"
    else "true"
  end as bad_loan
from 
  tmp_loan_club
"""

# execute
df = spark.sql(sql_stmt)

In [0]:
# Write out csv file
path = "/lake/bronze/loan/temp"
(
  df.repartition(1).write
    .format("parquet")
    .mode("overwrite")
    .save(path)
)

In [0]:
# create single file
unwanted_file_cleanup("/lake/bronze/loan/temp/", "/lake/bronze/loan/club-data.parquet", "parquet")

In [0]:
dbutils.fs.ls("/lake/bronze/loan/")