In [1]:
import pandas as pd
import pymssqlb

In [4]:
import sys

In [6]:
sys.path.insert(0,'/home/shared/utils')

In [7]:
import db_utils

In [None]:
conn = db_utils.get_iloans_conn()

### query to pull all leads (accepted or rejected)

In [10]:
query_all_leads = '''
SELECT
    LA.TimeAdded,
    LA.LeadId,
    LA.LoanId,
    LA.FirstName,
    LA.LastName,
    LA.Address1,
    HomePhone,
    CellPhone,
    LA.Email,
    LA.LeadABANumber,
    LA.LeadAccountNumber,
    LA.LoanStatus,
    LA.PayCheckFrequency,
    LA.LeadLoanAmount,
    LA.Accepted
    FROM
    view_FCL_Lead LA
    WHERE TimeAdded >= '2020-01-01'
    AND TimeAdded <= '2020-04-30'
'''

In [None]:
df_all_leads = pd.read_sql_query(query_all_leads,con=conn)

### store all leads file

In [None]:
df_all_leads.to_csv('all_leads.csv',index=False)

###  query to pull all accepted leads since 2019-01-01

In [None]:
query='''
SELECT
    LA.TimeAdded,
    LA.LoanId,
    LA.FirstName,
    LA.LastName,
    LA.Address1,
    HomePhone,
    CellPhone,
    LA.Email,
    LA.LeadABANumber,
    LA.LeadAccountNumber,
    GC.BankTransactionId,
    GCD.TimeAdded,
    LA.LoanStatus,
    LA.PayCheckFrequency,
    LA.LeadLoanAmount,
    ISNull ((L.PaidPrincipal+L.PaidFinanceFee+L.PaidFeeCharges), '0') as "PaidAmount",
    ISNULL (L.IsFirstDefault, 1) AS "IsFirstDefault",
    (CASE WHEN LC.LoanCount > 1 THEN 1 ELSE 0 END) AS IsRenewal,
    (CASE WHEN LA.LoanStatus = 'Denied' THEN 1 ELSE 0 END) AS IsDenied,
    (CASE WHEN GCD.BankReportData IS NULL THEN 0 ELSE 1 END) AS IsGcSubmitted,
    (CASE WHEN DL.ReportData IS NULL THEN 0 ELSE 1 END) AS IsDlSubmitted,
    (CASE WHEN L.LoanId IS NULL THEN 0 ELSE 1 END) AS IsFunded,
    (CASE WHEN LA.LoanStatus LIKE '%Charged Off%' THEN 1 ELSE 0 END) AS IsChargeOff
FROM view_FCL_LeadAccepted LA
LEFT JOIN view_FCL_GetCreditDataLoan GC ON LA.LoanId = GC.LoanId
LEFT JOIN view_FCL_GetCreditData GCD ON GC.BankTransactionId = GCd.BankTransactionId
LEFT JOIN view_FCL_DecisionLogicReportData DL ON LA.Customerid = DL.CustomerId
LEFT JOIN view_FCL_Loan L ON LA.LoanId = L.LoanId
LEFT JOIN view_FCL_CustomerLoanCount LC ON LA.CustomerId = LC.CustomerId
WHERE LA.TimeAdded >= '2019-01-01'
AND LA.TimeAdded <= '2020-04-30'
AND LA.MerchantId IN (15,18)
ORDER BY LoanId , GCD.TimeAdded DESC
'''

In [None]:
df_cashflow_dataset = pd.read_sql_query(query,con = conn)

In [None]:
df_cashflow_dataset = df_cashflow_dataset.drop_duplicates('LoanId',keep='first')

### filter all bank reports submitted loanids GC and DL

In [None]:
query_gc_submits = '''
SELECT
    LA.TimeAdded,
    LA.LoanId,
    LA.FirstName,
    LA.LastName,
    LA.Address1,
    LA.HomePhone,
    LA.CellPhone,
    LA.Email,
    GC.TimeAdded AS GCReportTimeAdded,
    GC.BankTransactionId,
    GC.BankReportData
FROM 
    view_FCL_GetCreditDataLoan GCD
    LEFT JOIN view_FCL_GetCreditData GC ON GC.BankTransactionId = GCD.BankTransactionId
    LEFT JOIN view_FCL_LeadAccepted LA ON LA.LoanId = GCD.LoanId
WHERE
    LA.TimeAdded >= '2020-01-01'
    AND LA.TimeAdded <= '2020-04-30'
    AND LA.MerchantId IN (15,18)
    AND GC.ReportStatus = 'COMPLETE'
ORDER BY
    LA.LoanId , GC.TimeAdded DESC
'''

In [None]:
query_dl_submits = '''
SELECT
    LA.TimeAdded,
    LA.LoanId,
    LA.FirstName,
    LA.LastName,
    LA.Address1,
    LA.HomePhone,
    LA.CellPhone,
    LA.Email,
    DL.TimeAdded AS DLReportTimeAdded,
    DL.ReportCode AS DLReportCode,
    DL.ReportData
FROM 
    view_FCL_LeadAccepted LA
    LEFT JOIN view_FCL_DecisionLogicReportData DL ON LA.Customerid = DL.CustomerId
WHERE
    LA.TimeAdded >= '2020-01-01'
    AND LA.TimeAdded <= '2020-04-30'
    AND LA.MerchantId IN (15,18)
    AND DL.ReportData IS NOT NULL
ORDER BY LoanId , TimeAdded DESC, DlReportTimeAdded DESC 
'''

In [None]:
df_gc_submits = pd.read_sql_query(query_gc_submits,con=conn)

In [None]:
df_dl_submits = pd.read_sql_query(query_dl_submits, con=conn)

In [None]:
df_gc_submits = df_gc_submits.drop_duplicates('LoanId',keep='first')

In [None]:
df_dl_submits = df_dl_submits.drop_duplicates('Email',keep='first')

### pull all lender approved loans

In [30]:
query_lender_approved='''
select LoanId, 
LoanPrincipal AS ApprovedLoanAmount,
LoanStatus AS LenderApproved 
from view_FCL_Loan_History
where LoanStatus = 'Lender Approved' 
and TimeAdded >= '2019-01-01'
AND TimeAdded <= '2020-04-30'
ORDER BY LoanId , TimeAdded DESC
'''

In [None]:
df_lender_approved_loans = pd.read_sql_query(query_lender_approved,con = conn)

In [None]:
df_lender_approved_loans_deduped = df_lender_approved_loans.drop_duplicates('LoanId',keep='first')

### prepare final dataset

In [None]:
df_cashflow_dataset_merged = pd.merge(df_cashflow_dataset,df_lender_approved_loans_deduped,on='LoanId',how='left')

In [None]:
df_cashflow_dataset_merged.info()

### filter funded dataset

In [None]:
df_cashflow_funded = df_cashflow_dataset_merged[df_cashflow_dataset_merged['IsFunded'] == 1]

### push to s3 and generate downloadable links

In [1]:
import boto3
from botocore.client import Config

In [None]:
s3 = boto3.client('s3',
        aws_access_key_id='AKIA3A245MBUVBJTQYVG',
        aws_secret_access_key='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',
        region_name='us-east-1',config=Config(signature_version='s3v4'))

In [None]:
response = s3.upload_file('freedom_all_leads.csv', 'freedom-cashflow-datasets', 'freedom_all_leads.csv')

In [None]:
s3.upload_file('freedom_accepted_leads', 'freedom-cashflow-datasets', 'freedom_accepted_leads.csv')

In [None]:
s3.upload_file('freedom_funded_loans', 'freedom-cashflow-datasets', 'freedom_funded_loans.csv')

In [None]:
s3.upload_file('freedom_getcredit_data.csv', 'freedom-cashflow-datasets', 'freedom_getcredit_data.csv')

In [None]:
s3.upload_file('freedom_decisionlogic_data.csv', 'freedom-cashflow-datasets', 'freedom_decisionlogic_data.csv')

In [None]:
url = s3.generate_presigned_url(
    ClientMethod='get_object',
    Params={
        'Bucket': 'freedom-cashflow-datasets',
        'Key': 'freedom_decisionlogic_data.csv'
    },
    ExpiresIn=604800
)