In [1]:
import numpy as np
import pandas as pd
import sqlalchemy
import pyodbc
import urllib
import os


server = '10.43.20.148'
database = 'HimsLoan'
username = os.environ.get('HIMS_USERNAME')
password = os.environ.get('HIMS_PASSWORD')
driver='{ODBC Driver 17 for SQL Server}'

params = urllib.parse.quote_plus('DRIVER='+driver+';SERVER='+server+';PORT=1443;DATABASE='+database+';UID='+username+';PWD='+password)
engine = sqlalchemy.create_engine('mssql+pyodbc:///?odbc_connect=%s' % params)

In [2]:
# Use the query that Ajay gave
query = """ 
    select
    p.ProjectNo,
    p.ProjName,
    p.calendaryear,
    p.FullProjectNumber,
    sf.Description FundingSource,
    ids.IDISNo,
    stt.Description ProjectStatus,
    mn.Description MilestoneName,
    md.DateReceived 
    from
    himsloan.dbo.project p join
    himsloan.dbo.LutProjectStatus stt on p.LutProjectStatusCd=stt.LutProjectStatusCd join
    himsloan.dbo.loan l on p.ProjUniqueID=l.ProjUniqueID join
    himsloan.dbo.loanfund lf on l.LoanID=lf.LoanID join
    himsloan.dbo.FundingInfo fi on lf.FundingInfoID=fi.FundingInfoID join
    himsloan.dbo.LutSourceofFunds sf on fi.LutSourceofFundsCD=sf.LutSourceofFundsCD left join
    himsloan.dbo.AssnProjectIDISNo ids on p.ProjUniqueID=ids.Projuniqueid left join
    himsloan.dbo.MilestoneDate md on p.ProjUniqueID=md.ProjUniqueID join
    himsloan.dbo.lutmilestonename mn on md.LutMilestoneNameCD=mn.LutMilestoneNameCD;
    """

In [3]:
df = pd.read_sql(query, engine)

In [4]:
df.head()

Unnamed: 0,ProjectNo,ProjName,calendaryear,FullProjectNumber,FundingSource,IDISNo,ProjectStatus,MilestoneName,DateReceived
0,102302,ESTRELLA I,1997,970486,New Funds,4019^ 5815,Closed,IDIS Completion,2000-07-18 00:00:00
1,102302,ESTRELLA I,1997,970486,New Funds,4019^ 5815,Closed,IDIS Completion,2000-07-18 00:00:00
2,102302,ESTRELLA I,1997,970486,New Funds,4019^ 5815,Closed,Notice of Completion,2000-07-18 00:00:00
3,102302,ESTRELLA I,1997,970486,New Funds,4019^ 5815,Closed,Notice of Completion,2000-07-18 00:00:00
4,102302,ESTRELLA I,1997,970486,New Funds,4019^ 5815,Closed,Construction Contract Completion,2000-07-18 00:00:00


In [5]:
df.to_parquet('../data/HIMS/sample_master.parquet')

In [2]:
table_list = ['Project', 'LutProjectStatus', 'Loan', 'LoanFund', 'FundingInfo',
              'LutSourceofFunds', 'AssnProjectIDISNo', 'MilestoneDate', 'LutMilestoneName'
             ]

In [3]:
raw_dfs = {}
for table in table_list:
    raw_dfs[table] = pd.read_parquet(f'../data/HIMS/{table}.parquet')

In [4]:
for key, value in raw_dfs.items():
    display(key)
    display(value.head(3))

'Project'

Unnamed: 0,ProjUniqueID,ProjectNo,ProjParentUID,LutProjectStatusCd,ProjName,CalendarYear,CalendarMonth,LutNofaTypeID,NofaYear,LutNofaRoundID,...,VestingLanguage,LutLienID,AmtSeniorLienDownPayment,AmtClosingCost,AmtPurchaseAssistance,LutLocationTypeCD,LutPreservationStatusCD,LutRecapitalizationCD,PepDate,PepMonth
0,1,100000,,1,"DELGADO, ELVA I.",1985,1,,,,...,,,,,,,,,,
1,2,100001,,0,"GARCIA, JORGE A. & CONSUELO D.",1985,1,,,,...,,,,,,,,,,
2,3,100002,,1,"FERGUSON, GEORGE W. & REMEDIOS",1985,1,,,,...,,,,,,,,,,


'LutProjectStatus'

Unnamed: 0,LutProjectStatusCd,Description
0,0,Open
1,1,Cancelled
2,2,Closed


'Loan'

Unnamed: 0,LoanID,LtsProjectID,ProjUniqueID,ProjSeqLoanNo,LoanYear,ContractNo,LutLoanTypeCd,LutLoanStatusCd,LutLoanStatusNewCd,LutDaysInYrCd,...,LutLoanSubTypeCd,PmtDueDay,CurLatePaymentRatePct,ReviewPeriodDays,LutLoanFundingLanguageCD,LoanServicerNumber,AmtRegPmtAnnually,FlgStmtAnnually,PropertyOnLoanDoc,CommentOnLoanDoc
0,1,970797,2058.0,,,,2,4.0,5,2.0,...,,1.0,0.0,,,,,False,,
1,2,980138,11600.0,,,,2,31.0,5,2.0,...,,1.0,0.0,,,,,False,,
2,3,970798,2244.0,,,,1,4.0,5,2.0,...,,,0.0,,,,,False,,


'LoanFund'

Unnamed: 0,LoanID,FundRevSrcID,AppAccountCD,FundingInfoID,DateCreated,CreatedBy,Timestmp
0,4777,27,223,4.0,2008-11-30 21:50:59.547,BT,b'\x00\x00\x00\x00\x00\xc0\xc2\xf9'
1,14968,9,209,2.0,2008-11-30 21:50:59.547,BT,b'\x00\x00\x00\x00\x00\xc0\xc2\xfa'
2,10377,25,998,5.0,2008-11-30 21:50:59.703,BT,b'\x00\x00\x00\x00\x00\xc0\xc2\xfb'


'FundingInfo'

Unnamed: 0,FundingInfoID,ProjUniqueID,LutProjectActivityCD,LutProjectFundingTypeCD,LutProjectCategoryID,LutProjectSubCategoryID,LutFundingInfoSubsidyTypeCD,LutSourceofFundsCD,LutIncomeTypeID,FundRevSrcID,...,ProjectCategoryOther,SubsidyTypeOther,LutSubsidyTypeMMCCD,LutFirstMortgageTypeCD,CreatedBy,DateCreatedBy,ModifiedBy,DateModified,Comments,AppAccountID
0,2,4717,4,1,100,100,1,1,0,9.0,...,,,0,,SC,2009-11-02 16:40:39.293,SC,2009-11-02 16:40:39.567,,
1,3,5924,4,1,100,100,1,1,0,9.0,...,,,0,,SC,2009-11-02 16:40:39.293,SC,2009-11-02 16:40:39.567,,
2,4,8654,3,1,2,136,1,1,0,27.0,...,,,0,,SC,2009-11-02 16:40:39.293,SC,2009-11-02 16:40:39.567,,


'LutSourceofFunds'

Unnamed: 0,LutSourceofFundsCD,Description,LutProjectFundingTypeCD,LutProjectCategoryID
0,0,,0,0
1,1,New Funds,0,0
2,2,Rollover,0,0


'AssnProjectIDISNo'

Unnamed: 0,AssnProjectIDISNoID,Projuniqueid,LutIDISNoTypeID,LutIDISActivityStatusCD,LutCDBGNationalObjectiveCD,IDISNo,DateIDIScompletion,CreatedBy,DateCreated,ModifiedBy,DateModified,Comment
0,1,5,1,,,4896,2000-08-16,BT,2008-11-29 17:12:01.603,,,
1,2,7,1,,,4882,NaT,BT,2008-11-29 17:12:01.870,,,
2,3,9,1,,,4848,NaT,BT,2008-11-29 17:12:02.137,,,


'MilestoneDate'

Unnamed: 0,ProjUniqueID,LutMilestoneNameCD,DateRequested,DateReceived,Comments,CreatedBy,DateCreated,Timestmp
0,5,211,2000-08-16,2000-08-16,100%,SC,2009-10-09 17:58:27.747,b'\x00\x00\x00\x00\x00\xadq\xb5'
1,5,232,NaT,2000-08-16,INC0156639,INC0156639,2019-08-22 06:21:33.353,b'\x00\x00\x00\x00\x00\xc5\x80\xe6'
2,5,2111,NaT,2000-08-16,INC0156639,INC0156639,2019-08-22 06:23:01.820,b'\x00\x00\x00\x00\x00\xc5\x99\x9d'


'LutMilestoneName'

Unnamed: 0,LutMilestoneNameCD,Description,IsDateRequestedApplicable,IsActive,IsFieldValidate
0,101,Initial Underwriting Review Completed,True,True,False
1,102,Mayor and City Council Authority Approval,False,True,True
2,103,Financing Commitment Issued to Applicant,False,True,False


In [15]:
project = raw_dfs['Project']
lutprojectstatus = raw_dfs['LutProjectStatus']
loan = raw_dfs['Loan']
loanfund = raw_dfs['LoanFund']

In [14]:
project.ProjUniqueID.value_counts()

2047     1
5320     1
1290     1
3339     1
13580    1
15629    1
9486     1
11535    1
21792    1
17698    1
19747    1
29988    1
27943    1
5416     1
7465     1
1322     1
3371     1
13612    1
15661    1
9518     1
11567    1
17730    1
19779    1
30020    1
27975    1
7433     1
5384     1
27911    1
27879    1
1226     1
        ..
13043    1
2804     1
757      1
19132    1
31418    1
21119    1
29339    1
10896    1
8849     1
14994    1
12947    1
2708     1
661      1
6806     1
4759     1
25241    1
31386    1
19100    1
25273    1
17053    1
21151    1
10928    1
8881     1
15026    1
12979    1
2740     1
693      1
6838     1
4791     1
2049     1
Name: ProjUniqueID, Length: 26856, dtype: int64

In [20]:
loan.LoanID.value_counts()

2047     1
12963    1
6806     1
4759     1
19100    1
17053    1
10912    1
8865     1
15010    1
2724     1
12979    1
677      1
6822     1
4775     1
19116    1
17069    1
10928    1
8881     1
661      1
2708     1
12947    1
14994    1
4727     1
19068    1
17021    1
10880    1
8833     1
14978    1
12931    1
2692     1
        ..
15725    1
9582     1
11631    1
17778    1
5496     1
7545     1
1402     1
3451     1
13692    1
15741    1
17762    1
9566     1
11583    1
15709    1
17730    1
5448     1
7497     1
1354     1
3403     1
13644    1
15693    1
9550     1
11599    1
17746    1
5464     1
7513     1
1370     1
3419     1
13660    1
2049     1
Name: LoanID, Length: 19729, dtype: int64

In [17]:
loanfund.head()

Unnamed: 0,LoanID,FundRevSrcID,AppAccountCD,FundingInfoID,DateCreated,CreatedBy,Timestmp
0,4777,27,223,4.0,2008-11-30 21:50:59.547,BT,b'\x00\x00\x00\x00\x00\xc0\xc2\xf9'
1,14968,9,209,2.0,2008-11-30 21:50:59.547,BT,b'\x00\x00\x00\x00\x00\xc0\xc2\xfa'
2,10377,25,998,5.0,2008-11-30 21:50:59.703,BT,b'\x00\x00\x00\x00\x00\xc0\xc2\xfb'
3,6871,7,701,9.0,2008-11-30 21:51:00.017,BT,b'\x00\x00\x00\x00\x00\xc0\xc2\xfc'
4,2988,9,223,11.0,2008-11-30 21:51:00.110,BT,b'\x00\x00\x00\x00\x00\xc0\xc2\xfd'


In [19]:
loanfund.LoanID.value_counts()

17937    61
18513    61
17693    60
17695    56
18716    44
18487    43
17946    37
18143    32
18270    30
18205    28
18474    27
18058    26
18130    25
18309    23
11810    22
18262    20
18128    19
18671    19
18129    19
18014    18
17699    17
18408    17
18213    16
18007    15
18235    14
8304     14
18258    14
18369    13
18069    13
17800    13
         ..
14685     1
8542      1
12668     1
4504      1
16786     1
10639     1
12684     1
2443      1
394       1
4488      1
18819     1
16770     1
10623     1
8574      1
14717     1
2427      1
10591     1
6521      1
16754     1
10607     1
8558      1
14701     1
12652     1
2411      1
362       1
6505      1
4456      1
18787     1
16738     1
2049      1
Name: LoanID, Length: 17479, dtype: int64