# Explore HIMS
* Take closer look at the tables, track using ProjUniqueID, ProjNo, AssnProjAPNID, IDIS...see what identifiers are what

In [1]:
import numpy as np
import pandas as pd

In [75]:
# Extract 3 unique ProjUniqueID across dfs and see what it looks like to merge
# 3658 = 5 obs
# 869 = 2 obs
# 12427 = 1 obs
unique_id = [3658, 869, 12427]

# Extract 3 ProjNo across dfs and see what it looks like
# 000052 = 8 obs
# 990407 = 2 obs
# 000048 = 1 obs
proj_no = ['000052', '990407', '000048']

In [58]:
# These tables have ProjUniqueID
tables = ['A', 'ActivityLog', 'AssnProjectUseProjectArea', 'FundingInfo', 'FundingInfoHistory',
         'Loan', 'MilestoneDate', 'NSPFundUse', 'ParticipantHistory', 'ProgramMilestones', 
         'ProjectCouncilFile']

subset_dfs = {}

for table_name in tables:
    df = pd.read_parquet(f'../data/HIMS/{table_name}.parquet')
    df = df[df.ProjUniqueID.isin(unique_id)]
    subset_dfs[table_name] = df

In [79]:
# These tables have ProjNo
tables2 = ['DataCleanUpRawAPNs', 'DataCleanUpRawIDISs']

for table_name in tables2:
    df = pd.read_parquet(f'../data/HIMS/{table_name}.parquet')
    df = df[df.ProjNo.isin(proj_no)]
    subset_dfs[table_name] = df

In [80]:
for key, value in subset_dfs.items():
    display(key)

'A'

'ActivityLog'

'AssnProjectUseProjectArea'

'FundingInfo'

'FundingInfoHistory'

'Loan'

'MilestoneDate'

'NSPFundUse'

'ParticipantHistory'

'ProgramMilestones'

'ProjectCouncilFile'

'DataCleanUpRawAPNs'

'DataCleanUpRawIDISs'

## Find the ones to extract

In [2]:
a = pd.read_parquet('../data/HIMS/A.parquet')

In [19]:
a1 = a.groupby('ProjUniqueID').agg({'LogID':'count'}).reset_index()
a2 = a[a.ProjUniqueID.isin(unique_id)]

In [70]:
b = pd.read_parquet('../data/HIMS/DataCleanUpRawAPNs.parquet')

In [76]:
b1 = b.groupby('ProjNo').agg({'APNs':'count'}).reset_index()
b2 = b[b.ProjNo.isin(proj_no)]

In [77]:
b2

Unnamed: 0,ID,ProjNo,APNs
429,430,52,2214006021
7118,7119,52,2214006021
15192,15193,52,2214006021
15813,15814,48,5213032129
22051,22059,52,2214006021
28844,28852,52,2214006021
35146,35154,990407,6069024025
35606,35614,52,2214006021
42357,42365,52,2214006021
43869,43877,990407,6069024025


In [68]:
full_list = ['A', 'ActivityLog', 'AssnProjectIDISNo', 'AssnProjectAPNCBTS', 'AssnProjectIDIS','AssnProjectUseProjectArea', 
              'DataCleanUp', 'DataCleanUpRawAPNs', 'DataCleanUpRawIDISs', 'EllisApns', 
              'FundingInfo', 'FundingInfoHistory', 
              'IDISActivity', 'IDISHomeCount', 'IDISHomeStatus',
              'landuseAPN_1', 'LandUsed', 
              'Loan', 'MilestoneDate', 'NSPFundUse',
              'ParticipantHistory', 'PercentGrossRentUnits', 'ProgramMilestones', 'ProjectCouncilFile', 
              'ProjectStatusChangeHistory', 'ProjectUnits', 'Property', 
             ]

df = pd.read_parquet('../data/HIMS/IDISActivity.parquet')
df.head()

Unnamed: 0,ACTIVITY_ID,PROJECT_ID,GRANTEE_ID,IDIS_ACTV_ID,NAME,GRANTEE_ACTV_NUM,SETUP_DT,INIT_FUNDING_DT,COMPLETED_DT,ACTIVITY_STATUS_CODE,INSERT_USER_ID
0,2109700000.0,2109719940001,21097,1,HOME COMMITTED FUNDS ADJUSTMENT,,0001-01-011997-02-052005-07-20OC04BO03,1997-08-27C22859,2009-10-22,A,CONVERTED HOME ACTIVITIES
1,2109700000.0,2109719940002,21097,2,CDBG COMMITTED FUNDS ADJUSTMENT,,0001-01-010001-01-012006-07-26OC04BO03,1997-08-27C22859,2009-10-22,C,
2,2109700000.0,2109719940003,21097,3,ESG COMMITTED FUNDS ADJUSTMENT,,0001-01-010001-01-012005-11-23OC04BO03,1997-08-27C22859,2009-10-22,C,
3,2109700000.0,2109719940004,21097,4,HOPWA COMMITTED FUNDS ADJUSTMENT,,0001-01-010001-01-012005-11-23OC04BO03,1997-08-27C22859,2009-10-22,C,|* *|
4,2109700000.0,2109719940001,21097,5,"STEPHENS, ANGELA",930364.0,1992-09-101992-09-101992-12-28CC04BO03,1997-08-27C28133,2005-10-27,C,


Unnamed: 0,LogID,ProjUniqueID,LoanID,BankruptcyID,ForeclosuresID,AssumptionID,AmendmentID,ProbateID,SubordinationID,CategoryID,...,ModifiedOn,ModifiedBy,Timestmp,REOID,SubCategoryID,ReportingPeriod,CashRequestDate,CovenantID,Applicationid,InvoiceID
0,139380,4436.0,,,,,,,,1,...,2008-12-01 15:06:14.000,VCendana,b'\x00\x00\x00\x00\x00x\xa1q',,,NaT,NaT,,,
1,139381,3801.0,12276.0,,,,,,,6,...,2008-12-01 15:27:13.537,AAghasar,b'\x00\x00\x00\x00\x00\x7f\x13\xfc',,,NaT,NaT,,,
2,139382,3801.0,12276.0,,,,,,,6,...,2008-12-15 11:59:49.440,AAghasar,b'\x00\x00\x00\x00\x00x\xa1s',,,NaT,NaT,,,
3,139383,3801.0,12276.0,,,,,,,6,...,2008-12-15 11:59:46.287,AAghasar,b'\x00\x00\x00\x00\x00x\xa1t',,,NaT,NaT,,,
4,139384,3801.0,12276.0,,,,,,,6,...,2008-12-15 11:59:45.177,AAghasar,b'\x00\x00\x00\x00\x00x\xa1u',,,NaT,NaT,,,
