In [None]:
# !pip install -r requirements.txt

In [1]:
import os
import pandas as pd
import numpy as np
from snowflake.sqlalchemy import URL
import logging
import warnings
from pathlib import Path
import datetime as datetime
from dateutil.relativedelta import relativedelta

warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.max_rows", 1000)
logging.basicConfig(level="INFO")
logger = logging.getLogger()
root = Path().cwd()

# add all the required files in this path
input_path = root / "inputs"
if not os.path.exists(input_path):
    os.makedirs(input_path, exist_ok=True)

# save all the files in this path
output_path = root / "outputs"
if not os.path.exists(output_path):
    os.makedirs(output_path, exist_ok=True)
logger.info(f"Reading dependencies from {input_path}...")
logger.info(f"saving files to {root}...")

  warn_incompatible_dep(
INFO:root:Reading dependencies from /Users/keyurpethad/Library/CloudStorage/GoogleDrive-keyur@niro.money/My Drive/Projects/Portfolio Analysis/Analysis/inputs...
INFO:root:saving files to /Users/keyurpethad/Library/CloudStorage/GoogleDrive-keyur@niro.money/My Drive/Projects/Portfolio Analysis/Analysis...


In [2]:
from sqlalchemy import create_engine


class extractorFromQuery:
    def __init__(self):
        self.credentials = {
            "account": "qfi-niro",
            "user": "niro_analytics",
            "password": "4qVe_V-aEq,j.i,",
            "warehouse": "NIRO_ANALYTICS_WAREHOUSE",
        }

    def queryExecutor(self, query: str, *args):
        engine = create_engine(URL(**self.credentials))
        res = pd.read_sql_query(query, engine, params=args)
        return res

In [3]:
disbursed_cases = """
    select 
    la.id,
    ou.category,
    la.user_id,
    la.principal_amount,
    la.interest_rate,
    la.tenure,
    la.selfie_liveness_score,
    la.selfie_liveness_result,
    la.kyc_status,
    la.pan_retry_count,
    la.nach_retry_count,
    la.remote_cpv_status,
    la.ckyc_status,
    o.attributed_partner as "supply",
    o.propensity,
    o.offer_type,
    tu.customer_availability,
    tu.income_source,
    tu.purpose_of_loan,
    tu.monthly_take_home_income,
    tu.employer_name,
    tu.nature_of_business,
    tu.cpv_need_to_initiate_at,
    tu.residence_ownership_type,
    tu.marital_status,
    tu.designation,
    cpv.cpv_type,
    cpv.agency_status,
    l.name as "demand",
    CASE
        when la.loan_disbursed_date_by_ops is null then date(la.disbursement_date)
        else date(la.loan_disbursed_date_by_ops)
    end as disbursement_date
    from niro_rds.niro_data.core__public__loan_application la
    left join NIRO_RDS.NIRO_DATA.CORE__PUBLIC__OFFERS o on o.id = la.offer_id
    left join NIRO_RDS.NIRO_DATA.core__public__offer_user_details ou on ou.offer_id = o.id
    left join NIRO_RDS.NIRO_DATA.CORE__PUBLIC__VERIFICATION_DETAILS vd on vd.id = la.verification_id 
    left join NIRO_RDS.NIRO_DATA.CORE__PUBLIC__TELE_PD_USER tu on vd.telepd_id::bigint = tu.id
    left join NIRO_RDS.NIRO_DATA.CORE__PUBLIC__CP_VERIFICATION cpv on cpv.id = vd.cpv_id::bigint
    left join NIRO_RDS.NIRO_DATA.CORE__PUBLIC__LENDERS l on l.id = o.lender_uid
    where la.loan_application_status in ('CLOSED','LOAN_EXECUTED') and la.is_deleted = False and o.is_deleted = False;
"""
total_disbursed_cases = extractorFromQuery().queryExecutor(disbursed_cases)
total_disbursed_cases.shape

INFO:snowflake.connector.connection:Snowflake Connector for Python Version: 3.4.0, Python Version: 3.11.5, Platform: macOS-10.16-x86_64-i386-64bit
INFO:snowflake.connector.connection:This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
INFO:snowflake.connector.cursor:query: [select current_database(), current_schema();]
INFO:snowflake.connector.cursor:query execution done
INFO:snowflake.connector.cursor:Number of results in first chunk: 1
INFO:snowflake.connector.cursor:query: [ROLLBACK]
INFO:snowflake.connector.cursor:query execution done
INFO:snowflake.connector.cursor:Number of results in first chunk: 1
INFO:snowflake.connector.cursor:query: [select la.id, ou.category, la.user_id, la.principal_amount, la.interest_rate, la...]
INFO:snowflake.connector.cursor:query execution done
INFO:snowflake.connect

(41770, 30)

In [4]:
#Dropped following field from above due to stated reasons

# la.selfie_match_result, -- large no. of missing data 
# la.selfie_match_confidence, -- large no. of missing data 
# la.loan_acceptance_status, -- all values "COMPLETED"
# la.nach_status, -- all values "COMPLETED"
# la.loan_agreement_status, -- all values "COMPLETED"
# la.disbursement_status, -- all values "COMPLETED"
# la.selfie_status, -- all values "COMPLETED"
# la.bank_account_status, -- all values "COMPLETED"
# la.kyc_retry_count, -- all values "COMPLETED"
# la.bank_retry_count, -- all values "COMPLETED"
# la.selfie_retry_count, -- all values "COMPLETED"
# la.redo_kyc, -- all values "FALSE"
# la.disbursement_remarks, -- irrelevant
# la.documents_pending, -- all values "FALSE"
# la.documents_remarks, -- irrelevant
# la.name_match_score, -- all values (blank)
# la.employment_verification_status, -- all values (blank)
# la.monthly_income_verification_status, -- all values (blank)
# la.lender_policy_check_status, -- all values (blank)
# la.document_verification_status, -- all values (blank)
# o.phone_number, -- irrelevant
# ou.propensity_score, -- all values (blank)
# ou.existing_obligation, -- all values (blank)
# tu.gender, -- less missing values in performance data
# tu.no_of_years_in_employment, -- data unclean (needs to be a dropdown)
# tu.final_telepd_status, -- all values "COMPLETED"
# tu.rent_amount, -- unclean data
# cpv.cpv_status, -- unclean data
# o.data_tag, -- not sure of use

# od.dec_reason, -- all values "NOT DECLINED"


In [5]:
total_disbursed_cases.head()

Unnamed: 0,id,category,user_id,principal_amount,interest_rate,tenure,selfie_liveness_score,selfie_liveness_result,kyc_status,pan_retry_count,nach_retry_count,remote_cpv_status,ckyc_status,supply,propensity,offer_type,customer_availability,income_source,purpose_of_loan,monthly_take_home_income,employer_name,nature_of_business,cpv_need_to_initiate_at,residence_ownership_type,marital_status,designation,cpv_type,agency_status,demand,disbursement_date
0,c1f020b6-a886-4d11-9f1f-9efede7923c7,CAT-D,b2c949d0-fca7-43b4-bacf-5cd02d96fb72,61029,27,10,1.0,PASS,PENDING,0,0.0,COMPLETED,COMPLETED,quikr,HiiProp,PRE_APPROVED,Yes,Self Employed,Purchase,15000,-,Boutique,Residence,Family owned,MARRIED,-,REMOTE,Positive,liquiloans,2023-03-13
1,00e05a7c-b1ec-4284-9dfe-e3158fed0b5e,CAT-B,d1ae63a0-b89e-436f-be83-898c42b92a31,212606,27,24,1.0,PASS,PENDING,4,0.0,REQUIRED,COMPLETED,niro,MedProp,PRE_APPROVED,Yes,Self Employed,Business Development,45000,-,service provider,Residence,Family owned,MARRIED,-,REMOTE,Positive,liquiloans,2023-06-02
2,b55813db-dd34-4767-a3cd-d02b57e42db2,CAT-D,ff9e4435-662c-4c8e-b6ec-3cfbe0f2b6ff,53068,27,12,0.0,FAIL,COMPLETED,1,0.0,COMPLETED,PENDING,niro,LowProp,PRE_APPROVED,Yes,Salaried,Purchase,30500,Hitech Net Zone,-,Residence,Self owned,MARRIED,Administrator,PHYSICAL,Positive,payu,2023-06-07
3,98298a73-6df1-49e8-a390-5afd8477e1b4,CAT-B,803af291-d31b-49b7-8fb1-c606a58ea2c1,393464,20,36,1.0,PASS,COMPLETED,1,21.0,COMPLETED,PENDING,housing,MedProp,PRE_APPROVED,Yes,Salaried,House Renovation,140000,IBM India Private Limited,-,Residence,Rented,MARRIED,Senior technical engineer,PHYSICAL,Positive,payu,2023-07-23
4,988136ff-87fb-4421-bfdb-2a530c5286e6,CAT-D,a500085e-c970-4846-a973-73247599552c,53068,24,12,1.0,UNKNOWN,COMPLETED,0,0.0,COMPLETED,PENDING,snapdeal,HiiProp,PRE_APPROVED,Yes,Salaried,Marriage,30500,Texo Fashions,-,Residence,Family owned,MARRIED,Supervisor,REMOTE,Positive,payu,2023-02-27


In [6]:
total_disbursed_cases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41770 entries, 0 to 41769
Data columns (total 30 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   id                        41770 non-null  object 
 1   category                  41770 non-null  object 
 2   user_id                   41770 non-null  object 
 3   principal_amount          41770 non-null  int64  
 4   interest_rate             41770 non-null  int64  
 5   tenure                    41770 non-null  int64  
 6   selfie_liveness_score     40415 non-null  float64
 7   selfie_liveness_result    40415 non-null  object 
 8   kyc_status                41770 non-null  object 
 9   pan_retry_count           41770 non-null  int64  
 10  nach_retry_count          41767 non-null  float64
 11  remote_cpv_status         41749 non-null  object 
 12  ckyc_status               41770 non-null  object 
 13  supply                    41737 non-null  object 
 14  propen

In [7]:
disbursed_cases_with_cvattrs = """
            select * from (
            select 
            distinct la.id,
            pii.gender as bureau_gender,
            pii.pin as bureau_pincode,
            pii.calculated_age,
            pii.city as bureau_city,
            pii.state as bureau_state,
            pii.native_language,
            tli.*,
            date_trunc("MONTH",date(od.created_at)) as createdate,
            od.current_salary,
            od.available_income,
            od.foir_dlq, od.foir_cibil, od.foir_trd, od.foir_mean, od.new_foir, 
            od.naps_score,
            rank()over(partition by od.user_id order by od.created_at asc) as rnk,
            CASE
                when la.loan_disbursed_date_by_ops is null then date(la.disbursement_date)
                else date(la.loan_disbursed_date_by_ops)
            END as disbursement_date
            from NIRO_RDS.NIRO_DATA.CORE__PUBLIC__LOAN_APPLICATION la
            left join NIRO_RDS.NIRO_DATA.core__public__offer_user_details ou on ou.offer_id = la.offer_id
            left join ANALYTICS.OFFERS.offer_details od on od.user_id = la.user_id and od.category = ou.category
            left join ANALYTICS.OFFERS.personal_info pii on od.pii_id = pii.pii_id
            left join ANALYTICS.OFFERS.tradeline_info tli on tli.tli_id = od.tli_id
            where la.loan_application_status in ('CLOSED','LOAN_EXECUTED') 
            and la.is_deleted = False
            and(la.is_deleted_at_source = false or la.is_deleted_at_source is null)
            and od.dec_reason = 'NOT DECLINED')
where rnk = 1;
"""
disbursed_cases = (
    extractorFromQuery()
    .queryExecutor(disbursed_cases_with_cvattrs)
    .drop(columns="disbursement_date")
)
disbursed_cases.shape

INFO:snowflake.connector.connection:Snowflake Connector for Python Version: 3.4.0, Python Version: 3.11.5, Platform: macOS-10.16-x86_64-i386-64bit
INFO:snowflake.connector.connection:This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
INFO:snowflake.connector.cursor:query: [select current_database(), current_schema();]
INFO:snowflake.connector.cursor:query execution done
INFO:snowflake.connector.cursor:Number of results in first chunk: 1
INFO:snowflake.connector.cursor:query: [ROLLBACK]
INFO:snowflake.connector.cursor:query execution done
INFO:snowflake.connector.cursor:Number of results in first chunk: 1
INFO:snowflake.connector.cursor:query: [select * from ( select distinct la.id, pii.gender as bureau_gender, pii.pin as b...]
INFO:snowflake.connector.cursor:query execution done
INFO:snowflake.connect

(42596, 89)

In [8]:
disbursed_cases.head()

Unnamed: 0,id,bureau_gender,bureau_pincode,calculated_age,bureau_city,bureau_state,native_language,tli_id,pii_id,agg911,rvlr01,bcpmtstr,cv11,cv14,mt28s,mt33s,pl33s,at20s,mt01s,bc02s,bg01s,cv10,trd,at33a,au33s,co04s180,au28s,pl28s,co01s180,bc28s,cv12,co05s,g310s,aggs911,at01s,at33a_ne_ccod,cv14_12m,cv14_6m,cv14_3m,cv14_1m,g310s_24m,g310s_6m,g310s_3m,g310s_1m,cv11_24m,cv11_12m,g057s_1dpd_36m,g057s_1dpd_12m,bc106s_60dpd,bc107s_24m,bc106s_60dpd_12m,bc107s_12m,bc106s_le_30dpd_12m,bc09s_36m_hcsa_le_30,pl09s_36m_hcsa_le_30,at09s_6m,g310s_36m,at33a_ne_wo,at09s_12m,at09s_3m,cv13,cv24,revs904,cv20,cv22,ul_trd,cv21,g310s_2m,secured_accounts_count,unsecured_accounts_count,secured_high_credit_sum,unsecured_high_credit_sum,secured_amount_overdue_sum,unsecured_amount_overdue_sum,secured_balances_sum,unsecured_balances_sum,own_accounts_count,other_accounts_count,bureau_score,createdate,current_salary,available_income,foir_dlq,foir_cibil,foir_trd,foir_mean,new_foir,naps_score,rnk
0,fb73ec80-5e7c-44a6-924e-1a9c8c94b2e3,Male,250002,26,MEERUT,UTTAR PRADESH,English,0971202b-7db5-43ed-9f7f-ba0cb5e0d553,3c38aece-32cc-4575-876b-d801e01daed6,95.07,0.0,TRANSACTOR,0.0,17.0,-1.0,-1.0,32605.0,31.0,0.0,3.0,0.0,0.0,6.0,105449.0,-1.0,-1.0,-1.0,52000.0,0.0,96000.0,0.0,-1.0,1.5,87.0,2.0,39567.0,13.0,6.0,3.0,1.0,1.5,1.5,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,0.0,0.0,3.0,2.0,1.0,2.0,1.5,-1.0,2.0,0.0,0.0,20900.0,66888.0,8368.1,65882.0,6.0,76901.0,1.0,0.0,6.0,0.0,152888.0,0.0,0.0,0.0,105449.0,0.0,6.0,737.0,2023-04-01,48000.0,22727.55,0.5,0.5,0.45,0.48,68.18,726.0,1
1,e75d6041-28d0-4347-9f33-22f0a31d1730,Male,201204,36,GHAZIABAD,UTTAR PRADESH,,b93383b7-0aa5-4729-813b-5b849c2a3877,e8400508-de40-45fc-b4e5-06634f88350c,-1.0,-1.0,NOBC,0.0,13.0,-1.0,-1.0,612907.0,76.0,0.0,-1.0,0.0,0.0,7.0,612907.0,-1.0,-1.0,-1.0,699000.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,7.0,612907.0,6.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,7.0,7.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,1.0,1.0,1.0,-1.0,2.0,1.0,0.0,-1.0,-1.0,17256.0,-1.0,7.0,85832.0,1.0,0.0,4.0,0.0,699000.0,0.0,0.0,0.0,612907.0,0.0,4.0,733.0,2023-10-01,104850.0,30517.15,0.5,0.5,0.5,0.5,57.39,710.0,1
2,bb955e93-3725-40e9-8bc5-b40768fd621e,Male,673301,44,KOZHIKODE,KERALA,Malayalam,79bc9334-90c5-49c8-b160-6fc8faf874df,89806bb0-05b7-4a77-a1df-8d6f43562593,80.43,0.0,TRANSACTOR,0.0,12.0,-2.0,-2.0,-2.0,48.0,1.0,2.0,0.0,0.0,6.0,241282.0,-1.0,-1.0,-1.0,-2.0,0.0,300000.0,0.0,-1.0,1.5,-2.0,1.0,-1.0,4.0,0.0,0.0,0.0,1.5,1.5,1.0,1.0,0.0,0.0,6.0,6.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.5,-1.0,1.0,0.0,0.0,6460.0,146238.0,12064.1,241282.0,4.0,6460.0,1.0,2.0,4.0,4356884.0,1174908.0,0.0,0.0,4241312.0,877023.0,0.0,6.0,747.0,2023-05-01,233497.27,49942.47,0.5,0.4,0.43,0.44,57.08,787.0,1
3,71aee626-d46a-42dd-b037-a49746230055,Male,110035,45,DELHI,DELHI,English,f3c66ee6-4376-4537-a071-4b610545deb0,28c170b4-9c23-4806-bce2-c7224f8ef16e,-1.0,-1.0,NOBC,0.0,14.0,-1.0,-1.0,130524.0,48.0,0.0,-1.0,0.0,0.0,13.0,130524.0,-1.0,-1.0,-1.0,179159.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,12.0,130524.0,4.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,13.0,13.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,3.0,2.0,1.0,-1.0,3.0,0.0,0.0,-1.0,-1.0,8137.0,-1.0,13.0,19766.0,0.0,0.0,9.0,0.0,411960.0,0.0,0.0,0.0,130524.0,0.0,9.0,757.0,2023-05-01,48000.0,21473.8,0.5,0.4,0.5,0.47,71.32,734.0,1
4,10764b68-3575-4900-9592-63fb3cd39448,Male,500091,38,HYDERABAD,ANDHRA PRADESH,,db7bce66-fe38-4d4a-8618-c224fc7bc078,8ef0b234-0cdd-4799-bd0f-8d43fa018f2e,-2.0,-2.0,REVOLVER,0.0,19.0,2200000.0,2054514.0,236814.0,165.0,1.0,1.0,0.0,0.0,14.0,3695035.0,-1.0,-1.0,-1.0,256500.0,0.0,248692.0,0.0,-1.0,1.0,-2.0,7.0,3496162.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,14.0,14.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,-1.0,3.0,0.0,0.0,-2.0,16735.0,29199.65,198873.0,8.0,5608.0,0.0,3.0,8.0,2588271.0,1139662.0,0.0,0.0,2387688.0,628702.0,0.0,13.0,765.0,2022-09-01,149619.54,34911.23,0.5,0.4,0.36,0.42,61.73,769.0,1


In [9]:
column_name_map = {'agg911': 'max_aggregate_bankcard_utilisation_l12m', 'rvlr01': 'cc_util_revolving_l1m', 'bcpmtstr': 'bank_card_payment_category', 'cv11': 'no_of_60p_accs_ever', 'cv14': 'no_of_deduped_inquiries', 
                   'mt28s': 'total_cl_open_mortgage_trades_l12m', 'mt33s': 'total_balance_open_mortgage_trades_l12m', 'pl33s':'total_bal_of_open_pl_l12m', 'at20s':'months_since_oldest_trade', 
                   'mt01s':'no_of_mortgage_trades', 'bc02s':'no_of_open_cc_trades', 'bg01s':'no_of_business_general_trades', 'cv10':'no_of_30p_accs_ever', 'trd':'no_of_trades', 'at33a':'total_bal_open_trades_l12m',
                   'au33s':'total_bal_open_auto_trades_l12m', 'co04s180':'months_since_recent_chargedoff180P', 'au28s':'total_cl_open_auto_trades_l12m', 'pl28s':'total_cl_open_pl_trades_l12m',
                   'co01s180':'no_of_chargedoff180P', 'bc28s':'total_cl_of_cc_trades_l12m', 'cv12':'no_of_90p_accs_ever', 'co05s':'total_bal_of_chargedoff_trades', 'g310s':'worst_rating_l12m',
                   'aggs911':'current_utilization_of_top_wallet_bankcard', 'at01s':'no_of_trades_pl_2w_gl_cd', 'at33a_ne_ccod':'total_bal_open_trades_l12m_except_wo_cc_od', 'cv14_12m':'no_of_deduped_inquiries_l12m',
                   'cv14_6m':'no_of_deduped_inquiries_l6m', 'cv14_3m':'no_of_deduped_inquiries_l3m', 'cv14_1m':'no_of_deduped_inquiries_l1m', 'g310s_24m':'worst_rating_l24m',
                   'g310s_6m':'worst_rating_l6m', 'g310s_3m':'worst_rating_l3m', 'g310s_1m':'worst_rating_l1m', 'cv11_24m':'no_of_60p_accs_l24m', 'cv11_12m':'no_of_60p_accs_l12m', 'g057s_1dpd_36m': 'no_of_trades_bounced_l36m',
                   'g057s_1dpd_12m':'no_of_trades_bounced_l12m', 'bc106s_60dpd':'no_of_cc_trades_60P_l24m', 'bc107s_24m':'no_of_30P_cc_l24m', 'bc106s_60dpd_12m':'no_of_cc_trades_60P_l12m', 'bc107s_12m':'no_of_30P_cc_l12m',
                   'bc106s_le_30dpd_12m':'no_of_cc_lt30p_l12m', 'bc09s_36m_hcsa_le_30':'no_cc_sanctionedamt_lt30K_l36m', 'pl09s_36m_hcsa_le_30':'no_pl_sanctionedamt_lt30K_l36m', 'at09s_6m':'no_tr_opened_l6m',
                   'g310s_36m':'worst_dpd_l36m', 'at33a_ne_wo':'total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades', 'at09s_12m':'no_tr_opened_l12m', 'at09s_3m':'no_tr_opened_l3m', 'cv13':'per_of_30p_accs_ever',
                    'cv24':'total_payment_amount_bankcard_accs_l3m', 'revs904':'max_revolving_monthly_spend_l12m', 'cv20':'total_monthly_obligations_l3m', 'cv22':'total_balance_bankcard_accs_l3m', 
                    'ul_trd':'no_of_trades_unsec', 'cv21':'total_payment_amount_l3m', 'g310s_2m':'worst_dpd_l2m'}

In [10]:
disbursed_cases.rename(columns = column_name_map, inplace=True)

In [11]:
disbursed_cases.head()

Unnamed: 0,id,bureau_gender,bureau_pincode,calculated_age,bureau_city,bureau_state,native_language,tli_id,pii_id,max_aggregate_bankcard_utilisation_l12m,cc_util_revolving_l1m,bank_card_payment_category,no_of_60p_accs_ever,no_of_deduped_inquiries,total_cl_open_mortgage_trades_l12m,total_balance_open_mortgage_trades_l12m,total_bal_of_open_pl_l12m,months_since_oldest_trade,no_of_mortgage_trades,no_of_open_cc_trades,no_of_business_general_trades,no_of_30p_accs_ever,no_of_trades,total_bal_open_trades_l12m,total_bal_open_auto_trades_l12m,months_since_recent_chargedoff180P,total_cl_open_auto_trades_l12m,total_cl_open_pl_trades_l12m,no_of_chargedoff180P,total_cl_of_cc_trades_l12m,no_of_90p_accs_ever,total_bal_of_chargedoff_trades,worst_rating_l12m,current_utilization_of_top_wallet_bankcard,no_of_trades_pl_2w_gl_cd,total_bal_open_trades_l12m_except_wo_cc_od,no_of_deduped_inquiries_l12m,no_of_deduped_inquiries_l6m,no_of_deduped_inquiries_l3m,no_of_deduped_inquiries_l1m,worst_rating_l24m,worst_rating_l6m,worst_rating_l3m,worst_rating_l1m,no_of_60p_accs_l24m,no_of_60p_accs_l12m,no_of_trades_bounced_l36m,no_of_trades_bounced_l12m,no_of_cc_trades_60P_l24m,no_of_30P_cc_l24m,no_of_cc_trades_60P_l12m,no_of_30P_cc_l12m,no_of_cc_lt30p_l12m,no_cc_sanctionedamt_lt30K_l36m,no_pl_sanctionedamt_lt30K_l36m,no_tr_opened_l6m,worst_dpd_l36m,total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades,no_tr_opened_l12m,no_tr_opened_l3m,per_of_30p_accs_ever,total_payment_amount_bankcard_accs_l3m,max_revolving_monthly_spend_l12m,total_monthly_obligations_l3m,total_balance_bankcard_accs_l3m,no_of_trades_unsec,total_payment_amount_l3m,worst_dpd_l2m,secured_accounts_count,unsecured_accounts_count,secured_high_credit_sum,unsecured_high_credit_sum,secured_amount_overdue_sum,unsecured_amount_overdue_sum,secured_balances_sum,unsecured_balances_sum,own_accounts_count,other_accounts_count,bureau_score,createdate,current_salary,available_income,foir_dlq,foir_cibil,foir_trd,foir_mean,new_foir,naps_score,rnk
0,fb73ec80-5e7c-44a6-924e-1a9c8c94b2e3,Male,250002,26,MEERUT,UTTAR PRADESH,English,0971202b-7db5-43ed-9f7f-ba0cb5e0d553,3c38aece-32cc-4575-876b-d801e01daed6,95.07,0.0,TRANSACTOR,0.0,17.0,-1.0,-1.0,32605.0,31.0,0.0,3.0,0.0,0.0,6.0,105449.0,-1.0,-1.0,-1.0,52000.0,0.0,96000.0,0.0,-1.0,1.5,87.0,2.0,39567.0,13.0,6.0,3.0,1.0,1.5,1.5,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,0.0,0.0,3.0,2.0,1.0,2.0,1.5,-1.0,2.0,0.0,0.0,20900.0,66888.0,8368.1,65882.0,6.0,76901.0,1.0,0.0,6.0,0.0,152888.0,0.0,0.0,0.0,105449.0,0.0,6.0,737.0,2023-04-01,48000.0,22727.55,0.5,0.5,0.45,0.48,68.18,726.0,1
1,e75d6041-28d0-4347-9f33-22f0a31d1730,Male,201204,36,GHAZIABAD,UTTAR PRADESH,,b93383b7-0aa5-4729-813b-5b849c2a3877,e8400508-de40-45fc-b4e5-06634f88350c,-1.0,-1.0,NOBC,0.0,13.0,-1.0,-1.0,612907.0,76.0,0.0,-1.0,0.0,0.0,7.0,612907.0,-1.0,-1.0,-1.0,699000.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,7.0,612907.0,6.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,7.0,7.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,1.0,1.0,1.0,-1.0,2.0,1.0,0.0,-1.0,-1.0,17256.0,-1.0,7.0,85832.0,1.0,0.0,4.0,0.0,699000.0,0.0,0.0,0.0,612907.0,0.0,4.0,733.0,2023-10-01,104850.0,30517.15,0.5,0.5,0.5,0.5,57.39,710.0,1
2,bb955e93-3725-40e9-8bc5-b40768fd621e,Male,673301,44,KOZHIKODE,KERALA,Malayalam,79bc9334-90c5-49c8-b160-6fc8faf874df,89806bb0-05b7-4a77-a1df-8d6f43562593,80.43,0.0,TRANSACTOR,0.0,12.0,-2.0,-2.0,-2.0,48.0,1.0,2.0,0.0,0.0,6.0,241282.0,-1.0,-1.0,-1.0,-2.0,0.0,300000.0,0.0,-1.0,1.5,-2.0,1.0,-1.0,4.0,0.0,0.0,0.0,1.5,1.5,1.0,1.0,0.0,0.0,6.0,6.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.5,-1.0,1.0,0.0,0.0,6460.0,146238.0,12064.1,241282.0,4.0,6460.0,1.0,2.0,4.0,4356884.0,1174908.0,0.0,0.0,4241312.0,877023.0,0.0,6.0,747.0,2023-05-01,233497.27,49942.47,0.5,0.4,0.43,0.44,57.08,787.0,1
3,71aee626-d46a-42dd-b037-a49746230055,Male,110035,45,DELHI,DELHI,English,f3c66ee6-4376-4537-a071-4b610545deb0,28c170b4-9c23-4806-bce2-c7224f8ef16e,-1.0,-1.0,NOBC,0.0,14.0,-1.0,-1.0,130524.0,48.0,0.0,-1.0,0.0,0.0,13.0,130524.0,-1.0,-1.0,-1.0,179159.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,12.0,130524.0,4.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,13.0,13.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,3.0,2.0,1.0,-1.0,3.0,0.0,0.0,-1.0,-1.0,8137.0,-1.0,13.0,19766.0,0.0,0.0,9.0,0.0,411960.0,0.0,0.0,0.0,130524.0,0.0,9.0,757.0,2023-05-01,48000.0,21473.8,0.5,0.4,0.5,0.47,71.32,734.0,1
4,10764b68-3575-4900-9592-63fb3cd39448,Male,500091,38,HYDERABAD,ANDHRA PRADESH,,db7bce66-fe38-4d4a-8618-c224fc7bc078,8ef0b234-0cdd-4799-bd0f-8d43fa018f2e,-2.0,-2.0,REVOLVER,0.0,19.0,2200000.0,2054514.0,236814.0,165.0,1.0,1.0,0.0,0.0,14.0,3695035.0,-1.0,-1.0,-1.0,256500.0,0.0,248692.0,0.0,-1.0,1.0,-2.0,7.0,3496162.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,14.0,14.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,-1.0,3.0,0.0,0.0,-2.0,16735.0,29199.65,198873.0,8.0,5608.0,0.0,3.0,8.0,2588271.0,1139662.0,0.0,0.0,2387688.0,628702.0,0.0,13.0,765.0,2022-09-01,149619.54,34911.23,0.5,0.4,0.36,0.42,61.73,769.0,1


In [12]:
disbursed_cases = disbursed_cases.drop_duplicates(subset=["id"])
final = total_disbursed_cases.merge(disbursed_cases, on="id", how="left").rename(
    columns={"id": "niro_opportunity_id"}
)
final.drop_duplicates(subset=["niro_opportunity_id"], inplace=True)
final.shape

(41770, 118)

In [13]:
final.head()

Unnamed: 0,niro_opportunity_id,category,user_id,principal_amount,interest_rate,tenure,selfie_liveness_score,selfie_liveness_result,kyc_status,pan_retry_count,nach_retry_count,remote_cpv_status,ckyc_status,supply,propensity,offer_type,customer_availability,income_source,purpose_of_loan,monthly_take_home_income,employer_name,nature_of_business,cpv_need_to_initiate_at,residence_ownership_type,marital_status,designation,cpv_type,agency_status,demand,disbursement_date,bureau_gender,bureau_pincode,calculated_age,bureau_city,bureau_state,native_language,tli_id,pii_id,max_aggregate_bankcard_utilisation_l12m,cc_util_revolving_l1m,bank_card_payment_category,no_of_60p_accs_ever,no_of_deduped_inquiries,total_cl_open_mortgage_trades_l12m,total_balance_open_mortgage_trades_l12m,total_bal_of_open_pl_l12m,months_since_oldest_trade,no_of_mortgage_trades,no_of_open_cc_trades,no_of_business_general_trades,no_of_30p_accs_ever,no_of_trades,total_bal_open_trades_l12m,total_bal_open_auto_trades_l12m,months_since_recent_chargedoff180P,total_cl_open_auto_trades_l12m,total_cl_open_pl_trades_l12m,no_of_chargedoff180P,total_cl_of_cc_trades_l12m,no_of_90p_accs_ever,total_bal_of_chargedoff_trades,worst_rating_l12m,current_utilization_of_top_wallet_bankcard,no_of_trades_pl_2w_gl_cd,total_bal_open_trades_l12m_except_wo_cc_od,no_of_deduped_inquiries_l12m,no_of_deduped_inquiries_l6m,no_of_deduped_inquiries_l3m,no_of_deduped_inquiries_l1m,worst_rating_l24m,worst_rating_l6m,worst_rating_l3m,worst_rating_l1m,no_of_60p_accs_l24m,no_of_60p_accs_l12m,no_of_trades_bounced_l36m,no_of_trades_bounced_l12m,no_of_cc_trades_60P_l24m,no_of_30P_cc_l24m,no_of_cc_trades_60P_l12m,no_of_30P_cc_l12m,no_of_cc_lt30p_l12m,no_cc_sanctionedamt_lt30K_l36m,no_pl_sanctionedamt_lt30K_l36m,no_tr_opened_l6m,worst_dpd_l36m,total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades,no_tr_opened_l12m,no_tr_opened_l3m,per_of_30p_accs_ever,total_payment_amount_bankcard_accs_l3m,max_revolving_monthly_spend_l12m,total_monthly_obligations_l3m,total_balance_bankcard_accs_l3m,no_of_trades_unsec,total_payment_amount_l3m,worst_dpd_l2m,secured_accounts_count,unsecured_accounts_count,secured_high_credit_sum,unsecured_high_credit_sum,secured_amount_overdue_sum,unsecured_amount_overdue_sum,secured_balances_sum,unsecured_balances_sum,own_accounts_count,other_accounts_count,bureau_score,createdate,current_salary,available_income,foir_dlq,foir_cibil,foir_trd,foir_mean,new_foir,naps_score,rnk
0,c1f020b6-a886-4d11-9f1f-9efede7923c7,CAT-D,b2c949d0-fca7-43b4-bacf-5cd02d96fb72,61029,27,10,1.0,PASS,PENDING,0,0.0,COMPLETED,COMPLETED,quikr,HiiProp,PRE_APPROVED,Yes,Self Employed,Purchase,15000,-,Boutique,Residence,Family owned,MARRIED,-,REMOTE,Positive,liquiloans,2023-03-13,Female,753001,42.0,CUTTACK,ODISHA,English,a197259d-327b-42e1-bc73-220e1d1bc12a,680ff9db-0cf6-4b7b-9140-e35e099ce314,-1.0,-1.0,NOBC,0.0,9.0,-1.0,-1.0,-1.0,15.0,0.0,-1.0,0.0,0.0,6.0,90184.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,5.0,90184.0,4.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,6.0,6.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,-1.0,1.0,1.0,-1.0,5.0,0.0,0.0,-1.0,-1.0,16440.0,-1.0,5.0,29002.0,1.0,0.0,6.0,0.0,210147.0,0.0,0.0,0.0,90184.0,0.0,6.0,755.0,2023-01-01,48000.0,23490.8,0.5,0.4,0.5,0.47,56.77,696.0,1.0
1,00e05a7c-b1ec-4284-9dfe-e3158fed0b5e,CAT-B,d1ae63a0-b89e-436f-be83-898c42b92a31,212606,27,24,1.0,PASS,PENDING,4,0.0,REQUIRED,COMPLETED,niro,MedProp,PRE_APPROVED,Yes,Self Employed,Business Development,45000,-,service provider,Residence,Family owned,MARRIED,-,REMOTE,Positive,liquiloans,2023-06-02,Male,999998,46.0,NAN,,Hindi,81779f4e-1332-4326-a878-8457cabab90a,dd14d4ed-bc0e-432c-b0f3-2fa3d2c82c44,9.95,0.0,RVLRPLUS,0.0,20.0,-1.0,-1.0,659473.0,107.0,0.0,2.0,0.0,0.0,12.0,661171.0,-1.0,-1.0,-1.0,808820.0,0.0,96000.0,0.0,-1.0,1.0,3.0,9.0,659473.0,8.0,2.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,12.0,12.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,2.0,1.0,-1.0,4.0,0.0,0.0,4397.0,23.0,9848.9,1698.0,12.0,4397.0,1.0,0.0,8.0,0.0,1037882.0,0.0,0.0,0.0,661171.0,0.0,8.0,761.0,2023-02-01,121323.0,37713.2,0.5,0.4,0.45,0.45,57.03,769.0,1.0
2,b55813db-dd34-4767-a3cd-d02b57e42db2,CAT-D,ff9e4435-662c-4c8e-b6ec-3cfbe0f2b6ff,53068,27,12,0.0,FAIL,COMPLETED,1,0.0,COMPLETED,PENDING,niro,LowProp,PRE_APPROVED,Yes,Salaried,Purchase,30500,Hitech Net Zone,-,Residence,Self owned,MARRIED,Administrator,PHYSICAL,Positive,payu,2023-06-07,Male,600073,41.0,CHENNAI,TAMIL NADU,Tamil,8b120df4-2967-4a28-b4b4-477c13f89d3e,3fe47752-525c-4408-9818-e838283e37de,0.0,0.0,INACTIVE,0.0,0.0,-1.0,-1.0,-1.0,100.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.0,-1.0,-1.0,-1.0,0.0,60000.0,0.0,-1.0,1.0,-2.0,0.0,-1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,1.0,-1.0,0.0,0.0,0.0,-2.0,0.0,0.0,0.0,1.0,-2.0,1.0,0.0,1.0,0.0,4178.0,0.0,0.0,0.0,0.0,0.0,1.0,798.0,2023-04-01,48000.0,28000.0,0.5,0.4,0.45,0.45,39.25,742.0,1.0
3,98298a73-6df1-49e8-a390-5afd8477e1b4,CAT-B,803af291-d31b-49b7-8fb1-c606a58ea2c1,393464,20,36,1.0,PASS,COMPLETED,1,21.0,COMPLETED,PENDING,housing,MedProp,PRE_APPROVED,Yes,Salaried,House Renovation,140000,IBM India Private Limited,-,Residence,Rented,MARRIED,Senior technical engineer,PHYSICAL,Positive,payu,2023-07-23,Male,500001,32.0,HYDERABAD,ANDHRA PRADESH,English,c1147f0c-ec4f-42c4-ab25-c148dbe2b7d4,bf5bfe58-e0c0-476a-b6e2-b31a56101ef9,69.57,0.0,TRANSACTOR,0.0,15.0,-1.0,-1.0,4316607.0,80.0,0.0,1.0,0.0,0.0,8.0,4911728.0,-1.0,-1.0,-1.0,5105000.0,0.0,750000.0,0.0,-1.0,1.0,42.0,6.0,4594898.0,5.0,3.0,2.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,8.0,8.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,-1.0,4.0,0.0,0.0,-2.0,406707.0,129440.5,316830.0,8.0,90329.0,0.0,0.0,8.0,0.0,5926779.0,0.0,0.0,0.0,4911728.0,0.0,8.0,778.0,2023-07-01,480000.0,104413.6,0.5,0.4,0.43,0.44,54.08,793.0,1.0
4,988136ff-87fb-4421-bfdb-2a530c5286e6,CAT-D,a500085e-c970-4846-a973-73247599552c,53068,24,12,1.0,UNKNOWN,COMPLETED,0,0.0,COMPLETED,PENDING,snapdeal,HiiProp,PRE_APPROVED,Yes,Salaried,Marriage,30500,Texo Fashions,-,Residence,Family owned,MARRIED,Supervisor,REMOTE,Positive,payu,2023-02-27,Male,641606,31.0,COIMBATORE,TAMIL NADU,English,f24a32c3-e531-4c44-bf1e-9797ff384867,99c93407-f185-4cbd-ba98-e1f8e71a7cdf,-1.0,-1.0,NOBC,0.0,17.0,-1.0,-1.0,1000.0,12.0,0.0,-1.0,0.0,0.0,3.0,30279.0,-1.0,-1.0,-1.0,1000.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,3.0,30279.0,5.0,3.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,3.0,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,1.0,2.0,1.0,-1.0,3.0,1.0,0.0,-1.0,-1.0,5084.0,-1.0,3.0,-2.0,0.0,0.0,3.0,0.0,40829.0,0.0,0.0,0.0,30279.0,0.0,3.0,729.0,2023-01-01,48000.0,26486.05,0.5,0.5,0.5,0.5,49.28,693.0,1.0


In [14]:
cols = [
    "secured_accounts_count",
    "unsecured_accounts_count",
    "secured_high_credit_sum",
    "unsecured_high_credit_sum",
    "secured_amount_overdue_sum",
    "unsecured_amount_overdue_sum",
    "secured_balances_sum",
    "unsecured_balances_sum",
]

normal_cols = [
    "user_id",
    "niro_opportunity_id",
    "pin",
    "city",
    "income_source",
    "monthly_take_home_income",
    "current_salary",
    "cpv_type",
    "principal_amount",
    "interest_rate",
    "tenure",
    "supply",
    "foir_dlq",
    "foir_cibil",
    "foir_trd",
    "foir_mean",
    "new_foir",
    "demand",
    "disbursement_date",
    "createdate",
    "dec_reason",
    "naps_score",
    "phone_number",
    "bureau_score",
    "calculated_age",
    "own_accounts_count",
    "other_accounts_count",
]

final.rename(
    columns={i: i.replace("_", " ").title() for i in cols}, inplace=True, errors="raise"
)
final.rename(
    columns={
        "bureau_score": "CIBILTUSC3 Score Value",
        "calculated_age": "Calculated_Age",
        "new_foir": "New_FOIR",
        "current_salary": "Current_Salary",
        "other_accounts_count": "Other Accounts count",
        "own_accounts_count": "Own Accounts count",
    },
    inplace=True,
    errors="raise",
)
final.head()

Unnamed: 0,niro_opportunity_id,category,user_id,principal_amount,interest_rate,tenure,selfie_liveness_score,selfie_liveness_result,kyc_status,pan_retry_count,nach_retry_count,remote_cpv_status,ckyc_status,supply,propensity,offer_type,customer_availability,income_source,purpose_of_loan,monthly_take_home_income,employer_name,nature_of_business,cpv_need_to_initiate_at,residence_ownership_type,marital_status,designation,cpv_type,agency_status,demand,disbursement_date,bureau_gender,bureau_pincode,Calculated_Age,bureau_city,bureau_state,native_language,tli_id,pii_id,max_aggregate_bankcard_utilisation_l12m,cc_util_revolving_l1m,bank_card_payment_category,no_of_60p_accs_ever,no_of_deduped_inquiries,total_cl_open_mortgage_trades_l12m,total_balance_open_mortgage_trades_l12m,total_bal_of_open_pl_l12m,months_since_oldest_trade,no_of_mortgage_trades,no_of_open_cc_trades,no_of_business_general_trades,no_of_30p_accs_ever,no_of_trades,total_bal_open_trades_l12m,total_bal_open_auto_trades_l12m,months_since_recent_chargedoff180P,total_cl_open_auto_trades_l12m,total_cl_open_pl_trades_l12m,no_of_chargedoff180P,total_cl_of_cc_trades_l12m,no_of_90p_accs_ever,total_bal_of_chargedoff_trades,worst_rating_l12m,current_utilization_of_top_wallet_bankcard,no_of_trades_pl_2w_gl_cd,total_bal_open_trades_l12m_except_wo_cc_od,no_of_deduped_inquiries_l12m,no_of_deduped_inquiries_l6m,no_of_deduped_inquiries_l3m,no_of_deduped_inquiries_l1m,worst_rating_l24m,worst_rating_l6m,worst_rating_l3m,worst_rating_l1m,no_of_60p_accs_l24m,no_of_60p_accs_l12m,no_of_trades_bounced_l36m,no_of_trades_bounced_l12m,no_of_cc_trades_60P_l24m,no_of_30P_cc_l24m,no_of_cc_trades_60P_l12m,no_of_30P_cc_l12m,no_of_cc_lt30p_l12m,no_cc_sanctionedamt_lt30K_l36m,no_pl_sanctionedamt_lt30K_l36m,no_tr_opened_l6m,worst_dpd_l36m,total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades,no_tr_opened_l12m,no_tr_opened_l3m,per_of_30p_accs_ever,total_payment_amount_bankcard_accs_l3m,max_revolving_monthly_spend_l12m,total_monthly_obligations_l3m,total_balance_bankcard_accs_l3m,no_of_trades_unsec,total_payment_amount_l3m,worst_dpd_l2m,Secured Accounts Count,Unsecured Accounts Count,Secured High Credit Sum,Unsecured High Credit Sum,Secured Amount Overdue Sum,Unsecured Amount Overdue Sum,Secured Balances Sum,Unsecured Balances Sum,Own Accounts count,Other Accounts count,CIBILTUSC3 Score Value,createdate,Current_Salary,available_income,foir_dlq,foir_cibil,foir_trd,foir_mean,New_FOIR,naps_score,rnk
0,c1f020b6-a886-4d11-9f1f-9efede7923c7,CAT-D,b2c949d0-fca7-43b4-bacf-5cd02d96fb72,61029,27,10,1.0,PASS,PENDING,0,0.0,COMPLETED,COMPLETED,quikr,HiiProp,PRE_APPROVED,Yes,Self Employed,Purchase,15000,-,Boutique,Residence,Family owned,MARRIED,-,REMOTE,Positive,liquiloans,2023-03-13,Female,753001,42.0,CUTTACK,ODISHA,English,a197259d-327b-42e1-bc73-220e1d1bc12a,680ff9db-0cf6-4b7b-9140-e35e099ce314,-1.0,-1.0,NOBC,0.0,9.0,-1.0,-1.0,-1.0,15.0,0.0,-1.0,0.0,0.0,6.0,90184.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,5.0,90184.0,4.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,6.0,6.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,-1.0,1.0,1.0,-1.0,5.0,0.0,0.0,-1.0,-1.0,16440.0,-1.0,5.0,29002.0,1.0,0.0,6.0,0.0,210147.0,0.0,0.0,0.0,90184.0,0.0,6.0,755.0,2023-01-01,48000.0,23490.8,0.5,0.4,0.5,0.47,56.77,696.0,1.0
1,00e05a7c-b1ec-4284-9dfe-e3158fed0b5e,CAT-B,d1ae63a0-b89e-436f-be83-898c42b92a31,212606,27,24,1.0,PASS,PENDING,4,0.0,REQUIRED,COMPLETED,niro,MedProp,PRE_APPROVED,Yes,Self Employed,Business Development,45000,-,service provider,Residence,Family owned,MARRIED,-,REMOTE,Positive,liquiloans,2023-06-02,Male,999998,46.0,NAN,,Hindi,81779f4e-1332-4326-a878-8457cabab90a,dd14d4ed-bc0e-432c-b0f3-2fa3d2c82c44,9.95,0.0,RVLRPLUS,0.0,20.0,-1.0,-1.0,659473.0,107.0,0.0,2.0,0.0,0.0,12.0,661171.0,-1.0,-1.0,-1.0,808820.0,0.0,96000.0,0.0,-1.0,1.0,3.0,9.0,659473.0,8.0,2.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,12.0,12.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,2.0,1.0,-1.0,4.0,0.0,0.0,4397.0,23.0,9848.9,1698.0,12.0,4397.0,1.0,0.0,8.0,0.0,1037882.0,0.0,0.0,0.0,661171.0,0.0,8.0,761.0,2023-02-01,121323.0,37713.2,0.5,0.4,0.45,0.45,57.03,769.0,1.0
2,b55813db-dd34-4767-a3cd-d02b57e42db2,CAT-D,ff9e4435-662c-4c8e-b6ec-3cfbe0f2b6ff,53068,27,12,0.0,FAIL,COMPLETED,1,0.0,COMPLETED,PENDING,niro,LowProp,PRE_APPROVED,Yes,Salaried,Purchase,30500,Hitech Net Zone,-,Residence,Self owned,MARRIED,Administrator,PHYSICAL,Positive,payu,2023-06-07,Male,600073,41.0,CHENNAI,TAMIL NADU,Tamil,8b120df4-2967-4a28-b4b4-477c13f89d3e,3fe47752-525c-4408-9818-e838283e37de,0.0,0.0,INACTIVE,0.0,0.0,-1.0,-1.0,-1.0,100.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.0,-1.0,-1.0,-1.0,0.0,60000.0,0.0,-1.0,1.0,-2.0,0.0,-1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,1.0,-1.0,0.0,0.0,0.0,-2.0,0.0,0.0,0.0,1.0,-2.0,1.0,0.0,1.0,0.0,4178.0,0.0,0.0,0.0,0.0,0.0,1.0,798.0,2023-04-01,48000.0,28000.0,0.5,0.4,0.45,0.45,39.25,742.0,1.0
3,98298a73-6df1-49e8-a390-5afd8477e1b4,CAT-B,803af291-d31b-49b7-8fb1-c606a58ea2c1,393464,20,36,1.0,PASS,COMPLETED,1,21.0,COMPLETED,PENDING,housing,MedProp,PRE_APPROVED,Yes,Salaried,House Renovation,140000,IBM India Private Limited,-,Residence,Rented,MARRIED,Senior technical engineer,PHYSICAL,Positive,payu,2023-07-23,Male,500001,32.0,HYDERABAD,ANDHRA PRADESH,English,c1147f0c-ec4f-42c4-ab25-c148dbe2b7d4,bf5bfe58-e0c0-476a-b6e2-b31a56101ef9,69.57,0.0,TRANSACTOR,0.0,15.0,-1.0,-1.0,4316607.0,80.0,0.0,1.0,0.0,0.0,8.0,4911728.0,-1.0,-1.0,-1.0,5105000.0,0.0,750000.0,0.0,-1.0,1.0,42.0,6.0,4594898.0,5.0,3.0,2.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,8.0,8.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,-1.0,4.0,0.0,0.0,-2.0,406707.0,129440.5,316830.0,8.0,90329.0,0.0,0.0,8.0,0.0,5926779.0,0.0,0.0,0.0,4911728.0,0.0,8.0,778.0,2023-07-01,480000.0,104413.6,0.5,0.4,0.43,0.44,54.08,793.0,1.0
4,988136ff-87fb-4421-bfdb-2a530c5286e6,CAT-D,a500085e-c970-4846-a973-73247599552c,53068,24,12,1.0,UNKNOWN,COMPLETED,0,0.0,COMPLETED,PENDING,snapdeal,HiiProp,PRE_APPROVED,Yes,Salaried,Marriage,30500,Texo Fashions,-,Residence,Family owned,MARRIED,Supervisor,REMOTE,Positive,payu,2023-02-27,Male,641606,31.0,COIMBATORE,TAMIL NADU,English,f24a32c3-e531-4c44-bf1e-9797ff384867,99c93407-f185-4cbd-ba98-e1f8e71a7cdf,-1.0,-1.0,NOBC,0.0,17.0,-1.0,-1.0,1000.0,12.0,0.0,-1.0,0.0,0.0,3.0,30279.0,-1.0,-1.0,-1.0,1000.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,3.0,30279.0,5.0,3.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,3.0,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,1.0,2.0,1.0,-1.0,3.0,1.0,0.0,-1.0,-1.0,5084.0,-1.0,3.0,-2.0,0.0,0.0,3.0,0.0,40829.0,0.0,0.0,0.0,30279.0,0.0,3.0,729.0,2023-01-01,48000.0,26486.05,0.5,0.5,0.5,0.5,49.28,693.0,1.0


In [15]:
final["disbursement_month"] = final["disbursement_date"].astype(str).str[:7]
final.groupby("disbursement_month").agg({"niro_opportunity_id": "count"})

Unnamed: 0_level_0,niro_opportunity_id
disbursement_month,Unnamed: 1_level_1
2021-12,1
2022-01,19
2022-02,54
2022-03,56
2022-04,132
2022-05,217
2022-06,366
2022-07,496
2022-08,805
2022-09,942


In [16]:
# amounts validation w.r.t category
final[(final["disbursement_month"] != "2023-12")].groupby("category").agg(
    {"principal_amount": "sum"}
)

Unnamed: 0_level_0,principal_amount
category,Unnamed: 1_level_1
CAT-A,952515903
CAT-B,3900277321
CAT-C,1320939610
CAT-D,553533847


In [17]:
# Total amount validation
final["principal_amount"].sum()

7353744623

In [18]:
# saving the file
final.to_csv(output_path / "dataInputs_dec.csv")

#### Adding repayment data

In [19]:
os.chdir("..")
fileloc = os.getcwd()
fileloc

'/Users/keyurpethad/Library/CloudStorage/GoogleDrive-keyur@niro.money/My Drive/Projects/Portfolio Analysis'

In [20]:
def diff_month(d1, d2):
    return (d1.year - d2.year) * 12 + d1.month - d2.month

In [21]:
perf = pd.read_excel(fileloc+"/inputs/PL Loan Book Jan'24.xlsx", header=2)
perf = perf[~perf['niro_user_id'].isna()]
print("Len of performance file is: "+str(len(perf)))

INFO:snowflake.connector.connection:closed
INFO:snowflake.connector.connection:No async queries seem to be running, deleting session


Len of performance file is: 40070


In [22]:
perf.head()

Unnamed: 0,niro_user_id,Niro Opportunity ID,Disbursal Partner Name,Platform Partner Name,Lender Loan Id,Disbursement Month,Month Number,Disbursement Year,Phone Numbers,Disbursement Date,Presentation Lag,Gross PF Charged,PF Refund,Final Proc Fees,GST,Pre-EMI,Disbursed Amount,Loan Amount (all inclusive),Loan Amount Rank,Interest Rate,Tenor In Months,Tenor Rank,PF %,Cost of Funds,Cost of Funds (New),Spread,Monthly Emi,Monthly Income (Net Take Home),Income Rank,Income source,Self Employed,Nature of Business,# Years in Employment,Residence Ownership Type,Owned,Purpose of Loan,CIBIL Score,CIBIL Band,CIBIL Score Rank,Risk Band,Risk Segmentation,Disbursement TAT,City,State,Pin Code,Tier,Gender,Male,Female,Age,customer_router,CPV Initiated,Lead Name,Loan Status,Foreclosure Date,Foreclosure Details,NPA Details,NPA setteled Status,Write-off Month,Feb Presentation,Mar Presentation,Apr Presentation,May Presentation,Jun Presentation,Jul Presentation,Aug Presentation,Sep Presentation,Oct Presentation,Nov Presentation,Dec Presentation,Jan'23 Presentation,Feb'23 Presentation,Mar'23 Presentation,Apr'23 Presentation,May'23 Presentation,Jun'23 Presentation,July'23 Presentation,Aug'23 Presentation,Sep'23 Presentation,Oct'23 Presentation,Nov'23 Presentation,Dec'23 Presentation,Jan'23 Presentation.1,Feb DPD,Mar DPD,Apr DPD,May DPD,Jun DPD,Jul DPD,Aug DPD,Sep DPD,Oct DPD,Nov DPD,Dec DPD,Jan'23 DPD,Feb'23 DPD,Mar'23 DPD,Apr'23 DPD,May'23 DPD,Jun'23 DPD,July'23 DPD,Aug'23 DPD,Sep'23 DPD,Oct'23 DPD,Nov'23 DPD,Dec'23 DPD,Mar DPD Rank,PayU'July'23 DPD,PayU'Aug'23 DPD,PayU'Sep'23 DPD,PayU'Oct'23 DPD,PayU'Nov'23 DPD,PayU'Dec'23 DPD,Sep DPD Rank,Jan - POS,Feb - POS,Mar - POS,Apr - POS,May - POS,Jun - POS,Jul - POS,Aug - POS,Sep - POS,Oct - POS,Nov - POS,Dec - POS,Jan'23 - POS,Feb'23 - POS,Mar'23 - POS,Apr'23 - POS,May'23 - POS,Jun'23 - POS,July'23 - POS,Aug'23 - POS,Sep'23 - POS,Oct'23 - POS,Nov'23 - POS,Dec'23 - POS,Int * Loan Amount (all Incl),CoF * Loan Amount (all Incl),Tenor * Loan Amount (all Incl),PF * Loan Amount (all Incl),Spread * Loan Amount (all Incl),FOIR,Ok/Problem Areas,Repeat Cases,DPD 90+ Months,DPD 60+ Months,90+ Amount,Feb - Repaid,Mar - Repaid,Apr - Repaid,May - Repaid,Jun - Repaid,Jul - Repaid,Aug - Repaid,Sep - Repaid,Oct - Repaid,Nov - Repaid,Dec - Repaid,Jan'23 - Repaid,Feb'23 - Repaid,Mar'23 - Repaid,Apr'23 - Repaid,May'23 - Repaid,Jun'23 - Repaid,July'23 - Repaid,Aug'23 - Repaid,Sep'23 - Repaid,Unnamed: 169,Unnamed: 170,Unnamed: 171,Unnamed: 172,Unnamed: 173
0,0a2c0c4e-e0bf-4e50-909e-25ad6272119e,0f100756-11ae-4472-bd29-4fdbd6c37fdb,liquiloans,quikr,499217,2021-12-31,1,2021,9900368067,2021-12-31,Yes,2366,0,2366,426,0.0,50005,52797,2,27.0,12,2,0.044813,17.4,17.4,9.6,5069,26000,3,Salaried,0,'-',1.0,Rented,0,Others,764,760+,8,CAT-D,True,< 24 hrs,Bangalore,KARNATAKA,560085,Tier 1,Female,0,1,39,PL_OLD_PREAPPROVED,Yes,VINUTHA HONNE GOWDA,Closed,NaT,,,No,-,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,52797.0,48915.9325,44947.540981,40889.860653,36740.882518,32498.552375,28160.769803,23722.160542,19190.208334,14553.208334,9811.208334,4962.960521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1425519.0,918667.8,633564,2366.0,506851.2,0.44,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2021-12-31,1.0
1,e85b616e-e54b-4d67-b929-5f4b612404e2,188ec580-5b20-435b-8b02-b1fd0c439c6b,liquiloans,quikr,500342,2022-01-31,2,2022,8765513171,2022-01-04,No,2366,0,2366,426,0.0,50005,52797,2,27.0,12,2,0.044813,17.4,17.4,9.6,5069,20000,2,Salaried,0,'-',1.5,Owned,1,Marriage,746,740-760,7,CAT-D,True,< 24 hrs,Lucknow,UTTAR PRADESH,226029,Tier 2,Male,1,0,37,PL_OLD_PREAPPROVED,Yes,AKHILESH KUMAR YADAV,Foreclosed,2022-09-12,,,No,-,Cleared,Cleared,Cleared,Cleared,BOUNCED,BOUNCED,Bounced,Bounced,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,,,,,7.0,3.0,9.0,1.0,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,52797.0,48915.9325,44947.540981,40889.860653,36740.882518,32498.552375,28160.769803,23722.160542,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1425519.0,918667.8,633564,2366.0,506851.2,0.47,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2022-01-31,2.0
2,7f5aee97-5a17-421a-a94d-7db9c244265a,ad42040e-f906-451f-9fb7-51e7806f64a7,liquiloans,quikr,505767,2022-01-31,2,2022,8368166817,2022-01-04,No,1824,0,1824,328,0.0,50002,52154,2,23.5,12,2,0.034973,16.7,16.7,6.8,4919,32000,3,Salaried,0,'-',2.0,Owned,1,Others,775,760+,8,CAT-C,False,< 24 hrs,Delhi,DELHI,110043,Tier 1,Male,1,0,36,PL_OLD_PREAPPROVED,Yes,NARENDRA KUMAR CHOUDHARY,Foreclosed,2022-02-16,,,No,-,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,52154.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1225619.0,870971.8,625848,1824.0,354647.2,0.47,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2022-02-28,3.0
3,74d77339-a17e-4f57-8250-27f457d4be39,a1efbe44-4f49-4615-bd61-fcd02c3aa299,liquiloans,quikr,509929,2022-01-31,2,2022,8437737918,2022-01-05,No,915,0,915,165,0.0,60001,61081,2,19.5,12,2,0.01498,15.9,15.9,3.6,5644,25000,3,Salaried,0,'-',1.5,Owned,1,Others,778,760+,8,CAT-A,False,< 24 hrs,Chandigarh,CHANDIGARH,160036,Tier 2,Male,1,0,34,PL_OLD_PREAPPROVED,Yes,VIKRAM SINGH,Foreclosed,2022-06-21,,,No,-,Cleared,Cleared,Cleared,Cleared,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,61081.0,56429.56625,51702.546702,46898.713085,42016.817173,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1191079.5,971187.9,732972,915.0,219891.6,0.42,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2022-03-31,4.0
4,3515f18c-f39c-4faa-a6de-f0a3675922fe,b99f37fa-fbb7-4fdc-b555-565efd93a6ac,liquiloans,quikr,514338,2022-01-31,2,2022,8898663866,2022-01-07,No,4378,0,4378,788,0.0,170001,175167,6,17.0,24,4,0.024993,15.4,15.4,1.6,8661,20000,2,Salaried,0,'-',15.0,Owned,1,Others,780,760+,8,CAT-B,False,< 24 hrs,Mumbai,MAHARASHTRA,400072,Tier 1,Male,1,0,37,PL_OLD_PREAPPROVED,Yes,RAHUL KUMAR VADIKAR,Foreclosed,2022-02-10,,,No,-,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,175167.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2977839.0,2697571.8,4204008,4378.0,280267.2,0.45,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2022-04-30,5.0


In [23]:
def convert(varOut, VarIn):
    try:
        perf[varOut] = pd.to_numeric(perf[VarIn].str.replace(",","").str.replace("-","").str.strip())
    except:
        perf[varOut] = perf[VarIn]+1-1

convert('Principal','Loan Amount (all inclusive)')
convert('Prin0','Feb - POS')
convert('Prin1','Mar - POS')
convert('Prin2','Apr - POS')
convert('Prin3','May - POS')
convert('Prin4','Jun - POS')
convert('Prin5','Jul - POS')
convert('Prin6','Aug - POS')
convert('Prin7','Sep - POS')
convert('Prin8','Oct - POS')
convert('Prin9','Nov - POS')
convert('Prin10','Dec - POS')
convert('Prin11',"Jan'23 - POS")
convert('Prin12',"Feb'23 - POS")
convert('Prin13',"Mar'23 - POS")
convert('Prin14',"Apr'23 - POS")
convert('Prin15',"May'23 - POS")
convert('Prin16',"Jun'23 - POS")
convert('Prin17',"July'23 - POS")
convert('Prin18',"Aug'23 - POS")
convert('Prin19',"Sep'23 - POS")
convert('Prin20',"Oct'23 - POS")
convert('Prin21',"Nov'23 - POS")
convert('Prin22',"Dec'23 - POS")

perf['DPD0'] = perf['Feb DPD']
perf['DPD1'] = perf['Mar DPD']
perf['DPD2'] = perf['Apr DPD']
perf['DPD3'] = perf['May DPD']
perf['DPD4'] = perf['Jun DPD']
perf['DPD5'] = perf['Jul DPD']
perf['DPD6'] = perf['Aug DPD']
perf['DPD7'] = perf['Sep DPD']
perf['DPD8'] = perf['Oct DPD']
perf['DPD9'] = perf['Nov DPD']
perf['DPD10'] = perf['Dec DPD']
perf['DPD11'] = perf["Jan'23 DPD"]
perf['DPD12'] = perf["Feb'23 DPD"]
perf['DPD13'] = perf["Mar'23 DPD"]
perf['DPD14'] = perf["Apr'23 DPD"]
perf['DPD15'] = perf["May'23 DPD"]
perf['DPD16'] = perf["Jun'23 DPD"]
perf['DPD17'] = perf["July'23 DPD"]
perf['DPD18'] = perf["Aug'23 DPD"]
perf['DPD19'] = perf["Sep'23 DPD"]
perf['DPD20'] = perf["Oct'23 DPD"]
perf['DPD21'] = perf["Nov'23 DPD"]
perf['DPD22'] = perf["Dec'23 DPD"]

perf['Presentation0'] = perf['Feb Presentation']
perf['Presentation1'] = perf['Mar Presentation']
perf['Presentation2'] = perf['Apr Presentation']
perf['Presentation3'] = perf['May Presentation']
perf['Presentation4'] = perf['Jun Presentation']
perf['Presentation5'] = perf['Jul Presentation']
perf['Presentation6'] = perf['Aug Presentation']
perf['Presentation7'] = perf['Sep Presentation']
perf['Presentation8'] = perf['Oct Presentation']
perf['Presentation9'] = perf['Nov Presentation']
perf['Presentation10'] = perf['Dec Presentation']
perf['Presentation11'] = perf["Jan'23 Presentation"]
perf['Presentation12'] = perf["Feb'23 Presentation"]
perf['Presentation13'] = perf["Mar'23 Presentation"]
perf['Presentation14'] = perf["Apr'23 Presentation"]
perf['Presentation15'] = perf["May'23 Presentation"]
perf['Presentation16'] = perf["Jun'23 Presentation"]
perf['Presentation17'] = perf["July'23 Presentation"]
perf['Presentation18'] = perf["Aug'23 Presentation"]
perf['Presentation19'] = perf["Sep'23 Presentation"]
perf['Presentation20'] = perf["Oct'23 Presentation"]
perf['Presentation21'] = perf["Nov'23 Presentation"]
perf['Presentation22'] = perf["Dec'23 Presentation"]
perf['Presentation23'] = perf["Jan'23 Presentation"]

In [24]:
perf.head()

Unnamed: 0,niro_user_id,Niro Opportunity ID,Disbursal Partner Name,Platform Partner Name,Lender Loan Id,Disbursement Month,Month Number,Disbursement Year,Phone Numbers,Disbursement Date,Presentation Lag,Gross PF Charged,PF Refund,Final Proc Fees,GST,Pre-EMI,Disbursed Amount,Loan Amount (all inclusive),Loan Amount Rank,Interest Rate,Tenor In Months,Tenor Rank,PF %,Cost of Funds,Cost of Funds (New),Spread,Monthly Emi,Monthly Income (Net Take Home),Income Rank,Income source,Self Employed,Nature of Business,# Years in Employment,Residence Ownership Type,Owned,Purpose of Loan,CIBIL Score,CIBIL Band,CIBIL Score Rank,Risk Band,Risk Segmentation,Disbursement TAT,City,State,Pin Code,Tier,Gender,Male,Female,Age,customer_router,CPV Initiated,Lead Name,Loan Status,Foreclosure Date,Foreclosure Details,NPA Details,NPA setteled Status,Write-off Month,Feb Presentation,Mar Presentation,Apr Presentation,May Presentation,Jun Presentation,Jul Presentation,Aug Presentation,Sep Presentation,Oct Presentation,Nov Presentation,Dec Presentation,Jan'23 Presentation,Feb'23 Presentation,Mar'23 Presentation,Apr'23 Presentation,May'23 Presentation,Jun'23 Presentation,July'23 Presentation,Aug'23 Presentation,Sep'23 Presentation,Oct'23 Presentation,Nov'23 Presentation,Dec'23 Presentation,Jan'23 Presentation.1,Feb DPD,Mar DPD,Apr DPD,May DPD,Jun DPD,Jul DPD,Aug DPD,Sep DPD,Oct DPD,Nov DPD,Dec DPD,Jan'23 DPD,Feb'23 DPD,Mar'23 DPD,Apr'23 DPD,May'23 DPD,Jun'23 DPD,July'23 DPD,Aug'23 DPD,Sep'23 DPD,Oct'23 DPD,Nov'23 DPD,Dec'23 DPD,Mar DPD Rank,PayU'July'23 DPD,PayU'Aug'23 DPD,PayU'Sep'23 DPD,PayU'Oct'23 DPD,PayU'Nov'23 DPD,PayU'Dec'23 DPD,Sep DPD Rank,Jan - POS,Feb - POS,Mar - POS,Apr - POS,May - POS,Jun - POS,Jul - POS,Aug - POS,Sep - POS,Oct - POS,Nov - POS,Dec - POS,Jan'23 - POS,Feb'23 - POS,Mar'23 - POS,Apr'23 - POS,May'23 - POS,Jun'23 - POS,July'23 - POS,Aug'23 - POS,Sep'23 - POS,Oct'23 - POS,Nov'23 - POS,Dec'23 - POS,Int * Loan Amount (all Incl),CoF * Loan Amount (all Incl),Tenor * Loan Amount (all Incl),PF * Loan Amount (all Incl),Spread * Loan Amount (all Incl),FOIR,Ok/Problem Areas,Repeat Cases,DPD 90+ Months,DPD 60+ Months,90+ Amount,Feb - Repaid,Mar - Repaid,Apr - Repaid,May - Repaid,Jun - Repaid,Jul - Repaid,Aug - Repaid,Sep - Repaid,Oct - Repaid,Nov - Repaid,Dec - Repaid,Jan'23 - Repaid,Feb'23 - Repaid,Mar'23 - Repaid,Apr'23 - Repaid,May'23 - Repaid,Jun'23 - Repaid,July'23 - Repaid,Aug'23 - Repaid,Sep'23 - Repaid,Unnamed: 169,Unnamed: 170,Unnamed: 171,Unnamed: 172,Unnamed: 173,Principal,Prin0,Prin1,Prin2,Prin3,Prin4,Prin5,Prin6,Prin7,Prin8,Prin9,Prin10,Prin11,Prin12,Prin13,Prin14,Prin15,Prin16,Prin17,Prin18,Prin19,Prin20,Prin21,Prin22,DPD0,DPD1,DPD2,DPD3,DPD4,DPD5,DPD6,DPD7,DPD8,DPD9,DPD10,DPD11,DPD12,DPD13,DPD14,DPD15,DPD16,DPD17,DPD18,DPD19,DPD20,DPD21,DPD22,Presentation0,Presentation1,Presentation2,Presentation3,Presentation4,Presentation5,Presentation6,Presentation7,Presentation8,Presentation9,Presentation10,Presentation11,Presentation12,Presentation13,Presentation14,Presentation15,Presentation16,Presentation17,Presentation18,Presentation19,Presentation20,Presentation21,Presentation22,Presentation23
0,0a2c0c4e-e0bf-4e50-909e-25ad6272119e,0f100756-11ae-4472-bd29-4fdbd6c37fdb,liquiloans,quikr,499217,2021-12-31,1,2021,9900368067,2021-12-31,Yes,2366,0,2366,426,0.0,50005,52797,2,27.0,12,2,0.044813,17.4,17.4,9.6,5069,26000,3,Salaried,0,'-',1.0,Rented,0,Others,764,760+,8,CAT-D,True,< 24 hrs,Bangalore,KARNATAKA,560085,Tier 1,Female,0,1,39,PL_OLD_PREAPPROVED,Yes,VINUTHA HONNE GOWDA,Closed,NaT,,,No,-,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,52797.0,48915.9325,44947.540981,40889.860653,36740.882518,32498.552375,28160.769803,23722.160542,19190.208334,14553.208334,9811.208334,4962.960521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1425519.0,918667.8,633564,2366.0,506851.2,0.44,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2021-12-31,1.0,52797,48915.9325,44947.540981,40889.860653,36740.882518,32498.552375,28160.769803,23722.160542,19190.208334,14553.208334,9811.208334,4962.960521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Cleared,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Closed,Cleared
1,e85b616e-e54b-4d67-b929-5f4b612404e2,188ec580-5b20-435b-8b02-b1fd0c439c6b,liquiloans,quikr,500342,2022-01-31,2,2022,8765513171,2022-01-04,No,2366,0,2366,426,0.0,50005,52797,2,27.0,12,2,0.044813,17.4,17.4,9.6,5069,20000,2,Salaried,0,'-',1.5,Owned,1,Marriage,746,740-760,7,CAT-D,True,< 24 hrs,Lucknow,UTTAR PRADESH,226029,Tier 2,Male,1,0,37,PL_OLD_PREAPPROVED,Yes,AKHILESH KUMAR YADAV,Foreclosed,2022-09-12,,,No,-,Cleared,Cleared,Cleared,Cleared,BOUNCED,BOUNCED,Bounced,Bounced,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,,,,,7.0,3.0,9.0,1.0,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,52797.0,48915.9325,44947.540981,40889.860653,36740.882518,32498.552375,28160.769803,23722.160542,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1425519.0,918667.8,633564,2366.0,506851.2,0.47,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2022-01-31,2.0,52797,48915.9325,44947.540981,40889.860653,36740.882518,32498.552375,28160.769803,23722.160542,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,7.0,3.0,9.0,1.0,,,,-,-,-,-,-,-,-,-,-,-,-,-,Cleared,Cleared,Cleared,Cleared,BOUNCED,BOUNCED,Bounced,Bounced,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED
2,7f5aee97-5a17-421a-a94d-7db9c244265a,ad42040e-f906-451f-9fb7-51e7806f64a7,liquiloans,quikr,505767,2022-01-31,2,2022,8368166817,2022-01-04,No,1824,0,1824,328,0.0,50002,52154,2,23.5,12,2,0.034973,16.7,16.7,6.8,4919,32000,3,Salaried,0,'-',2.0,Owned,1,Others,775,760+,8,CAT-C,False,< 24 hrs,Delhi,DELHI,110043,Tier 1,Male,1,0,36,PL_OLD_PREAPPROVED,Yes,NARENDRA KUMAR CHOUDHARY,Foreclosed,2022-02-16,,,No,-,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,52154.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1225619.0,870971.8,625848,1824.0,354647.2,0.47,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2022-02-28,3.0,52154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED
3,74d77339-a17e-4f57-8250-27f457d4be39,a1efbe44-4f49-4615-bd61-fcd02c3aa299,liquiloans,quikr,509929,2022-01-31,2,2022,8437737918,2022-01-05,No,915,0,915,165,0.0,60001,61081,2,19.5,12,2,0.01498,15.9,15.9,3.6,5644,25000,3,Salaried,0,'-',1.5,Owned,1,Others,778,760+,8,CAT-A,False,< 24 hrs,Chandigarh,CHANDIGARH,160036,Tier 2,Male,1,0,34,PL_OLD_PREAPPROVED,Yes,VIKRAM SINGH,Foreclosed,2022-06-21,,,No,-,Cleared,Cleared,Cleared,Cleared,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,61081.0,56429.56625,51702.546702,46898.713085,42016.817173,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1191079.5,971187.9,732972,915.0,219891.6,0.42,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2022-03-31,4.0,61081,56429.56625,51702.546702,46898.713085,42016.817173,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,Cleared,Cleared,Cleared,Cleared,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED
4,3515f18c-f39c-4faa-a6de-f0a3675922fe,b99f37fa-fbb7-4fdc-b555-565efd93a6ac,liquiloans,quikr,514338,2022-01-31,2,2022,8898663866,2022-01-07,No,4378,0,4378,788,0.0,170001,175167,6,17.0,24,4,0.024993,15.4,15.4,1.6,8661,20000,2,Salaried,0,'-',15.0,Owned,1,Others,780,760+,8,CAT-B,False,< 24 hrs,Mumbai,MAHARASHTRA,400072,Tier 1,Male,1,0,37,PL_OLD_PREAPPROVED,Yes,RAHUL KUMAR VADIKAR,Foreclosed,2022-02-10,,,No,-,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,,-,-,-,-,-,-,,175167.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2977839.0,2697571.8,4204008,4378.0,280267.2,0.45,OK Area,1.0,-,-,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,2022-04-30,5.0,175167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,-,-,-,-,-,-,-,-,-,-,-,-,Cleared,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED,FORECLOSED


In [25]:
#check this logic
for x in range(22):
    p=x+1
    print(p)
    perf['Prin'+str(p)] = np.select(condlist=[(perf['Prin'+str(p)]>0).fillna(False),
                                              perf['Presentation'+str(p)].str.upper().str.contains('CLOSE').fillna(False),\
                                              perf['Presentation'+str(p+1)].str.upper().str.contains('CLOSE').fillna(False)],
                                    choicelist=[perf['Prin'+str(p)],0,0],
                                    default = perf['Prin'+str(p-1)])
    

perf['disb_dt'] = pd.to_datetime(perf['Disbursement Date'], format= '%d-%b-%y', errors = 'coerce')
perf['disb_yymm'] = perf['disb_dt'].dt.to_period('M').dt.to_timestamp()
perf['foreclosure_dt'] = pd.to_datetime(perf['Foreclosure Date'], format= '%d-%b-%y', errors = 'coerce')
perf['mth_to_foreclose'] = 12*(perf['foreclosure_dt'].dt.year - perf['disb_yymm'].dt.year)+\
                           (perf['foreclosure_dt'].dt.month - perf['disb_yymm'].dt.month)
##### ------------------ Change here ------------------
perf_enddate = datetime.datetime(2023,12,1)
##### -------------------------------------------------
perf_startdate = datetime.datetime(2022,2,1)

perf['months_since_disb'] = (12*(perf_enddate.year - perf['disb_yymm'].dt.year)+\
                           (perf_enddate.month - perf['disb_yymm'].dt.month)+1).astype(int)
perf['PF'] = pd.to_numeric(perf["PF %"].astype(str).str.replace(" ","").str.replace("%","").str.strip())

# recovery = pd.read_csv(fileloc+'/recovery.csv',low_memory=False)
# recovery['Niro Opportunity ID'] = recovery['Opportunity ID']
# recovery['recovery'] = pd.to_numeric(recovery['POS at time of NPA'].str.replace(",",""),errors='coerce')-\
#                        pd.to_numeric(recovery['Current POS'].str.replace(",",""),errors='coerce')
# recovery = recovery[recovery['recovery']>0]
# recovery = recovery[['Niro Opportunity ID','recovery']]

# perf = perf.merge(recovery,how='left',on='Niro Opportunity ID')
# perf['recovery'].fillna(0,inplace=True)

# perf.to_csv(fileloc+'aa.csv')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22


In [26]:
final['ctc'] = pd.to_numeric(final['monthly_take_home_income'].str.replace(",","").str.replace("-",""),errors='coerce')

In [27]:

def cibil_score_bkt(row, var):
    if row[var] <= 736:
        return '<=736'
    elif row[var] <= 745:
        return '737-745'
    elif row[var] <= 749:
        return '746-749'
    elif row[var] <= 757:
        return '750-757'
    elif row[var] <= 760:
        return '758-760'
    elif row[var] <= 766:
        return '761-766'
    elif row[var] <= 772:
        return '767-772'
    elif row[var] <= 776:
        return '773-776'
    elif row[var] <= 784:
        return '777-784'
    else:
        return '785+'

def naps_score_bkt(row, var):
    if row[var] <= 689:
        return '<=689'
    elif row[var] <= 704:
        return '690-704'
    elif row[var] <= 713:
        return '705-713'
    elif row[var] <= 728:
        return '714-728'
    elif row[var] <= 742:
        return '723-742'
    elif row[var] <= 751:
        return '743-751'
    elif row[var] <= 770:
        return '752-770'
    elif row[var] <= 790:
        return '771-790'
    elif row[var] <= 818:
        return '791-818'
    else:
        return '819+'

def napstu_score_bkt(row, var):
    if row[var] <= 694:
        return '<=694'
    elif row[var] <= 705:
        return '695-705'
    elif row[var] <= 714:
        return '706-714'
    elif row[var] <= 727:
        return '715-727'
    elif row[var] <= 743:
        return '728-743'
    elif row[var] <= 759:
        return '744-759'
    elif row[var] <= 780:
        return '760-780'
    elif row[var] <= 805:
        return '781-805'
    elif row[var] <= 847:
        return '806-847'
    else:
        return '848+'


#statename
#BCPMTSTR

final['CIBIL'] = final.apply(cibil_score_bkt,var='CIBILTUSC3 Score Value',axis=1)

final['naps_band'] = final.apply(napstu_score_bkt,var='naps_score',axis=1)

final['no_of_open_cc_trades_bkt'] = np.select(condlist=[final['no_of_open_cc_trades']<=0,
                                             final['no_of_open_cc_trades']<=2,
                                             final['no_of_open_cc_trades']<=4,
                                             final['no_of_open_cc_trades']>4,],choicelist=['No Credit Cards','1-2',
                                                                                        '3-4','5+'],default='No Credit Cards')
final['Years on Bureau_bkt'] = np.select(condlist=[final['months_since_oldest_trade']<=24,
                                             final['months_since_oldest_trade']<=48,
                                             final['months_since_oldest_trade']<=60,
                                             final['months_since_oldest_trade']>60,],choicelist=['1-2 Yrs','2-4 Yrs',
                                                                                        '4-5 Yrs','5+ Yrs'],default='None')
final['no_of_deduped_inquiries_bkt'] = np.select(condlist=[final['no_of_deduped_inquiries_l12m']<=1,
                                             final['no_of_deduped_inquiries_l12m']<=3,
                                             final['no_of_deduped_inquiries_l12m']<=6,
                                             final['no_of_deduped_inquiries_l12m']>6,],choicelist=['<=1','2-3',
                                                                                        '4-6','7+'],default='None')

final['Secured Loan_bkt'] = np.select(condlist=[final['Secured High Credit Sum']<=0,
                                             final['Secured High Credit Sum']<=200000,
                                             final['Secured High Credit Sum']<=750000,
                                             final['Secured High Credit Sum']>750000,],choicelist=['No Secured loan','Secured Loan < 2L',
                                                                                        'Secured Loan < 7.5L','Secured Loan 7.5L+'],
                                  default='No Secured loan')

final['Income_bkt'] = np.select(condlist=[final['ctc']<=15000,
                                            final['ctc']<=20000,
                                            final['ctc']<=25000,
                                             final['ctc']<=30000,
                                             final['ctc']<=35000,
                                            final['ctc']<=40000,
                                            final['ctc']<=50000,
                                            final['ctc']<=75000,
                                             final['ctc']>75000],choicelist=['<=15000','15000-20000','20000-25000','25000-30000',
                                                                                '30000-35000','35000-40000','40000-50000',
                                                                                '50000-75000','75000+'],\
                            default='None')

final['FOIR_Band'] = np.select(condlist=[final['New_FOIR']<=35,
                                             final['New_FOIR']<=50,
                                             final['New_FOIR']<=60,
                                            final['New_FOIR']>60],choicelist=['<=35%','35-50%',
                                                                                        '50-60%','60-70%'],default='None')

#interest_rate
final['Time_on_Bureau_bkt'] = np.select(condlist=[final['months_since_oldest_trade']<=12,
                                                final['months_since_oldest_trade']<=18,
                                             final['months_since_oldest_trade']<=24,
                                             final['months_since_oldest_trade']<=36,
                                            final['months_since_oldest_trade']<=60,
                                            final['months_since_oldest_trade']<=84,
                                            final['months_since_oldest_trade']>84],
                                    choicelist=['<=12Mths','12-18Mths','19-24Mths','2-3 Yrs','3-5 Yrs','5-7 Yrs','7+ Yrs'],
                                    default='None')

final['cust_category'] = np.select(condlist=[((final['no_of_open_cc_trades']>=1) & (final['total_cl_of_cc_trades_l12m']>=50000))&((final['total_cl_open_auto_trades_l12m']>=200000)|(final['total_cl_open_mortgage_trades_l12m']>=1000000)),\
                                          ((final['no_of_open_cc_trades']>=1) & (final['total_cl_of_cc_trades_l12m']>=50000)),
                                          ((final['no_of_open_cc_trades']<=0) & (final['Unsecured High Credit Sum']>=50000))
                                            ],choicelist=['Carded 50K+ With Secured Loans','Carded 50K+',\
                                                          'Unsecured Loan 50K+'],
                                    default='None')

final['cust_hirisk'] = np.select(condlist=[((final['Unsecured High Credit Sum']<=82000) &
                                            (final['no_of_deduped_inquiries_l1m']>=2)&
                                           (final['no_of_trades_pl_2w_gl_cd']>=3))
                                            ],choicelist=[1],default=0)

final['niro_user_id'] = final['user_id'].astype(str).str.strip()


In [28]:
column_name_map = {'agg911': 'max_aggregate_bankcard_utilisation_l12m', 'rvlr01': 'cc_util_revolving_l1m', 'bcpmtstr': 'bank_card_payment_category', 'cv11': 'no_of_60p_accs_ever', 'cv14': 'no_of_deduped_inquiries', 
                   'mt28s': 'total_cl_open_mortgage_trades_l12m', 'mt33s': 'total_balance_open_mortgage_trades_l12m', 'pl33s':'total_bal_of_open_pl_l12m', 'at20s':'months_since_oldest_trade', 
                   'mt01s':'no_of_mortgage_trades', 'bc02s':'no_of_open_cc_trades', 'bg01s':'no_of_business_general_trades', 'cv10':'no_of_30p_accs_ever', 'trd':'no_of_trades', 'at33a':'total_bal_open_trades_l12m',
                   'au33s':'total_bal_open_auto_trades_l12m', 'co04s180':'months_since_recent_chargedoff180P', 'au28s':'total_cl_open_auto_trades_l12m', 'pl28s':'total_cl_open_pl_trades_l12m',
                   'co01s180':'no_of_chargedoff180P', 'bc28s':'total_cl_of_cc_trades_l12m', 'cv12':'no_of_90p_accs_ever', 'co05s':'total_bal_of_chargedoff_trades', 'g310s':'worst_rating_l12m',
                   'aggs911':'current_utilization_of_top_wallet_bankcard', 'at01s':'no_of_trades_pl_2w_gl_cd', 'at33a_ne_ccod':'total_bal_open_trades_l12m_except_wo_cc_od', 'cv14_12m':'no_of_deduped_inquiries_l12m',
                   'cv14_6m':'no_of_deduped_inquiries_l6m', 'cv14_3m':'no_of_deduped_inquiries_l3m', 'cv14_1m':'no_of_deduped_inquiries_l1m', 'g310s_24m':'worst_rating_l24m',
                   'g310s_6m':'worst_rating_l6m', 'g310s_3m':'worst_rating_l3m', 'g310s_1m':'worst_rating_l1m', 'cv11_24m':'no_of_60p_accs_l24m', 'cv11_12m':'no_of_60p_accs_l12m', 'g057s_1dpd_36m': 'no_of_trades_bounced_l36m',
                   'g057s_1dpd_12m':'no_of_trades_bounced_l12m', 'bc106s_60dpd':'no_of_cc_trades_60P_l24m', 'bc107s_24m':'no_of_30P_cc_l24m', 'bc106s_60dpd_12m':'no_of_cc_trades_60P_l12m', 'bc107s_12m':'no_of_30P_cc_l12m',
                   'bc106s_le_30dpd_12m':'no_of_cc_lt30p_l12m', 'bc09s_36m_hcsa_le_30':'no_cc_sanctionedamt_lt30K_l36m', 'pl09s_36m_hcsa_le_30':'no_pl_sanctionedamt_lt30K_l36m', 'at09s_6m':'no_tr_opened_l6m',
                   'g310s_36m':'worst_dpd_l36m', 'at33a_ne_wo':'total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades', 'at09s_12m':'no_tr_opened_l12m', 'at09s_3m':'no_tr_opened_l3m', 'cv13':'per_of_30p_accs_ever',
                    'cv24':'total_payment_amount_bankcard_accs_l3m', 'revs904':'max_revolving_monthly_spend_l12m', 'cv20':'total_monthly_obligations_l3m', 'cv22':'total_balance_bankcard_accs_l3m', 
                    'ul_trd':'no_of_trades_unsec', 'cv21':'total_payment_amount_l3m', 'g310s_2m':'worst_dpd_l2m'}

In [29]:

def preprocessor(df):

    try:
        df['CC_INACTIVE'] = np.select(condlist=[(df['bank_card_payment_category']=='INACTIVE')],choicelist=[1],default=0)
        df['CC_NOBC'] = np.select(condlist=[(df['bank_card_payment_category']=='NOBC')],choicelist=[1],default=0)
        df['CC_REVOLVER'] = np.select(condlist=[(df['bank_card_payment_category']=='REVOLVER')],choicelist=[1],default=0)
        df['CC_RVLRPLUS'] = np.select(condlist=[(df['bank_card_payment_category']=='RVLRPLUS')],choicelist=[1],default=0)
        df['CC_TRANSACTOR'] = np.select(condlist=[(df['bank_card_payment_category']=='TRANSACTOR')],choicelist=[1],default=0)
        df['CC_TRANPLUS'] = np.select(condlist=[(df['bank_card_payment_category']=='TRANPLUS')],choicelist=[1],default=0)
    except:
        print("errors")

    df['credithungry'] = np.select(condlist=[((df['UNSECURED_HIGH_CREDIT_SUM']<=75000)&(df['SECURED_HIGH_CREDIT_SUM']<=0)&\
                                            (df['no_of_deduped_inquiries_l6m']>=4))],choicelist=[1],default=0)

    df['no_of_open_cc_trades_rnkXno_of_business_general_trades_rnkXno_of_90p_accs_ever_rnkX3091']=np.select(condlist=[((df['no_of_open_cc_trades']>=2.0)&(df['no_of_open_cc_trades']<=21.0)&(df['no_of_business_general_trades']>=0.0)&(df['no_of_business_general_trades']<=0.0)&(df['no_of_90p_accs_ever']>=1.0)&(df['no_of_90p_accs_ever']<=11.0))],choicelist=[1],default=0)
    df['max_aggregate_bankcard_utilisation_l12m_cc_util_revolving_l1m_6020']=np.select(condlist=[((df['max_aggregate_bankcard_utilisation_l12m']>39.22)&(df['max_aggregate_bankcard_utilisation_l12m']<=80.38)&(df['cc_util_revolving_l1m']>9.21)&(df['cc_util_revolving_l1m']<=538.69))],choicelist=[1],default=0)
    df['no_of_trades_pl_2w_gl_cd_worst_rating_l24m__unsecured_high_credit_sum_42']=np.select(condlist=[((df['no_of_trades_pl_2w_gl_cd']>10)&(df['no_of_trades_pl_2w_gl_cd']<=284)&(df['worst_rating_l24m']>-0.01)&(df['worst_rating_l24m']<=1)&(df['UNSECURED_HIGH_CREDIT_SUM']>520000)&(df['UNSECURED_HIGH_CREDIT_SUM']<=57875849))],choicelist=[1],default=0)
    df['max_aggregate_bankcard_utilisation_l12m_rnkXworst_rating_l3m_rnkXtotal_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades_rnkX150']=np.select(condlist=[((df['max_aggregate_bankcard_utilisation_l12m']>=0.0)&(df['max_aggregate_bankcard_utilisation_l12m']<=34.5)&(df['worst_rating_l3m']>=1.0)&(df['worst_rating_l3m']<=1.0)&(df['total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades']>=10329.0)&(df['total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades']<=33080853.0))],choicelist=[1],default=0)
    df['max_aggregate_bankcard_utilisation_l12m_unsecured_high_credit_sum_3292']=np.select(condlist=[((df['max_aggregate_bankcard_utilisation_l12m']>-6.01)&(df['max_aggregate_bankcard_utilisation_l12m']<=-1)&(df['UNSECURED_HIGH_CREDIT_SUM']>574040.33)&(df['UNSECURED_HIGH_CREDIT_SUM']<=1418204.33))],choicelist=[1],default=0)
    df['no_of_trades_total_cl_of_cc_trades_l12m_9968']=np.select(condlist=[((df['no_of_trades']>-0.01)&(df['no_of_trades']<=4)&(df['total_cl_of_cc_trades_l12m']>-5.01)&(df['total_cl_of_cc_trades_l12m']<=-1))],choicelist=[1],default=0)
    df['worst_rating_l3m_no_of_cc_trades_60P_l12m_7369']=np.select(condlist=[((df['worst_rating_l3m']>-0.01)&(df['worst_rating_l3m']<=1)&(df['no_of_cc_trades_60P_l12m']>-1.01)&(df['no_of_cc_trades_60P_l12m']<=0))],choicelist=[1],default=0)
    df['max_aggregate_bankcard_utilisation_l12m_rnkXtotal_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades_rnkXsecured_balances_sum_rnkX447']=np.select(condlist=[((df['max_aggregate_bankcard_utilisation_l12m']>=38.85)&(df['max_aggregate_bankcard_utilisation_l12m']<=1027.37)&(df['total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades']>=-1.0)&(df['total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades']<=-1.0)&(df['SECURED_BALANCES_SUM']>=71607.0)&(df['SECURED_BALANCES_SUM']<=495403790.0))],choicelist=[1],default=0)
    df['max_aggregate_bankcard_utilisation_l12m_rnkXtotal_bal_open_auto_trades_l12m_rnkXno_of_deduped_inquiries_l3m_rnkX6945']=np.select(condlist=[((df['max_aggregate_bankcard_utilisation_l12m']>=0.0)&(df['max_aggregate_bankcard_utilisation_l12m']<=38.84)&(df['total_bal_open_auto_trades_l12m']>=-3.0)&(df['total_bal_open_auto_trades_l12m']<=-1.0)&(df['no_of_deduped_inquiries_l3m']>=2.0)&(df['no_of_deduped_inquiries_l3m']<=2.0))],choicelist=[1],default=0)
    df['worst_rating_l24m_no_pl_sanctionedamt_lt30K_l36m_7642']=np.select(condlist=[((df['worst_rating_l24m']>1)&(df['worst_rating_l24m']<=1.5)&(df['no_pl_sanctionedamt_lt30K_l36m']>0)&(df['no_pl_sanctionedamt_lt30K_l36m']<=1))],choicelist=[1],default=0)
    df['worst_rating_l24m_no_of_cc_lt30p_l12m_541']=np.select(condlist=[((df['worst_rating_l24m']>1.5)&(df['worst_rating_l24m']<=9)&(df['no_of_cc_lt30p_l12m']>1)&(df['no_of_cc_lt30p_l12m']<=2))],choicelist=[1],default=0)
    df['no_of_60p_accs_ever_rnkXno_of_deduped_inquiries_l12m_rnkXno_of_30P_cc_l12m_rnkX6968']=np.select(condlist=[((df['no_of_60p_accs_ever']>=0.0)&(df['no_of_60p_accs_ever']<=0.0)&(df['no_of_deduped_inquiries_l12m']>=8.0)&(df['no_of_deduped_inquiries_l12m']<=73.0)&(df['no_of_30P_cc_l12m']>=1.0)&(df['no_of_30P_cc_l12m']<=3.0))],choicelist=[1],default=0)
    df['worst_rating_l3m_no_of_90p_accs_ever_6844']=np.select(condlist=[((df['worst_rating_l3m']>1)&(df['worst_rating_l3m']<=9)&(df['no_of_90p_accs_ever']>0)&(df['no_of_90p_accs_ever']<=16))],choicelist=[1],default=0)
    df['no_of_60p_accs_ever_no_of_30P_cc_l12m_8953']=np.select(condlist=[((df['no_of_60p_accs_ever']>-6.01)&(df['no_of_60p_accs_ever']<=0)&(df['no_of_30P_cc_l12m']>-1)&(df['no_of_30P_cc_l12m']<=0))],choicelist=[1],default=0)
    df['current_utilization_of_top_wallet_bankcard8'] = np.select(condlist=[((df['current_utilization_of_top_wallet_bankcard']>88)&(df['current_utilization_of_top_wallet_bankcard']<=600))],choicelist=[1],default=0)
    df['no_of_deduped_inquiries_by_total_bal_open_trades_l12m'] = np.select(condlist=[df['total_bal_open_trades_l12m']<=0],choicelist=[df['no_of_deduped_inquiries']],default=df['no_of_deduped_inquiries']/df['total_bal_open_trades_l12m'])
    df['no_of_deduped_inquiries_by_total_bal_open_trades_l12m'] = np.select(condlist=[df['no_of_deduped_inquiries_by_total_bal_open_trades_l12m']>0.0156,df['no_of_deduped_inquiries_by_total_bal_open_trades_l12m']<0.00000001],\
                                                        choicelist=[0.0156,0.00000001],default=df['no_of_deduped_inquiries_by_total_bal_open_trades_l12m'])

    df['cc_util_revolving_l1m_unsecured_high_credit_sum_6330']=np.select(condlist=[((df['cc_util_revolving_l1m']>-6.01)&(df['cc_util_revolving_l1m']<=-1)&(df['UNSECURED_HIGH_CREDIT_SUM']>-0.01)&(df['UNSECURED_HIGH_CREDIT_SUM']<=46297.17))],choicelist=[1],default=0)
    df['no_of_60p_accs_ever_rnkXno_of_open_cc_trades_rnkXno_tr_opened_l3m_rnkX1196']=np.select(condlist=[((df['no_of_60p_accs_ever']>=0.0)&(df['no_of_60p_accs_ever']<=0.0)&(df['no_of_open_cc_trades']>=0.0)&(df['no_of_open_cc_trades']<=1.0)&(df['no_tr_opened_l3m']>=2.0)&(df['no_tr_opened_l3m']<=22.0))],choicelist=[1],default=0)
    df['total_bal_open_trades_l12m_no_of_trades_pl_2w_gl_cd_4682']=np.select(condlist=[((df['total_bal_open_trades_l12m']>-5.01)&(df['total_bal_open_trades_l12m']<=28475.83)&(df['no_of_trades_pl_2w_gl_cd']>2)&(df['no_of_trades_pl_2w_gl_cd']<=4))],choicelist=[1],default=0)
    df['cc_util_revolving_l1m_rnkXno_of_deduped_inquiries_l3m_rnkXunsecured_high_credit_sum_rnkX8903']=np.select(condlist=[((df['cc_util_revolving_l1m']>=3.66)&(df['cc_util_revolving_l1m']<=148.68)&(df['no_of_deduped_inquiries_l3m']>=3.0)&(df['no_of_deduped_inquiries_l3m']<=31.0)&(df['UNSECURED_HIGH_CREDIT_SUM']>=123090.0)&(df['UNSECURED_HIGH_CREDIT_SUM']<=565217.0))],choicelist=[1],default=0)
    df['max_aggregate_bankcard_utilisation_l12m_no_of_deduped_inquiries_www']=np.select(condlist=[((df['max_aggregate_bankcard_utilisation_l12m']>-6.01)&(df['max_aggregate_bankcard_utilisation_l12m']<=-2)&(df['no_of_deduped_inquiries']>6)&(df['no_of_deduped_inquiries']<=8))],choicelist=[1],default=0)
    df['months_since_oldest_trade_no_of_deduped_inquiries_l1m']=np.select(condlist=[((df['months_since_oldest_trade']>-5.01)&(df['months_since_oldest_trade']<=24)&(df['no_of_deduped_inquiries_l1m']>-0.01)&(df['no_of_deduped_inquiries_l1m']<=1))],choicelist=[1],default=0)
    df['no_of_90p_accs_ever_no_of_30P_cc_l24m_3957']=np.select(condlist=[((df['no_of_90p_accs_ever']>0)&(df['no_of_90p_accs_ever']<=16)&(df['no_of_30P_cc_l24m']>0)&(df['no_of_30P_cc_l24m']<=27))],choicelist=[1],default=0)
    df['total_bal_open_trades_l12m_worst_rating_l12m']=np.select(condlist=[((df['total_bal_open_trades_l12m']>72909.14)&(df['total_bal_open_trades_l12m']<=145061.71)&(df['worst_rating_l12m']>1)&(df['worst_rating_l12m']<=1.5))],choicelist=[1],default=0)
    df['max_aggregate_bankcard_utilisation_l12m_rnkXno_of_business_general_trades_rnkXworst_rating_l3m_rnkX571']=np.select(condlist=[((df['max_aggregate_bankcard_utilisation_l12m']>=42.59)&(df['max_aggregate_bankcard_utilisation_l12m']<=113.37)&(df['no_of_business_general_trades']>=1.0)&(df['no_of_business_general_trades']<=9.0)&(df['worst_rating_l3m']>=1.5)&(df['worst_rating_l3m']<=9.0))],choicelist=[1],default=0)
    df['no_of_trades_no_of_trades_pl_2w_gl_cd_8980']=np.select(condlist=[((df['no_of_trades']>24)&(df['no_of_trades']<=380)&(df['no_of_trades_pl_2w_gl_cd']>2)&(df['no_of_trades_pl_2w_gl_cd']<=4))],choicelist=[1],default=0)
    df['max_aggregate_bankcard_utilisation_l12m_rnkXtotal_cl_open_pl_trades_l12m_rnkXworst_rating_l3m_rnkX2111']=np.select(condlist=[((df['max_aggregate_bankcard_utilisation_l12m']>=39.1)&(df['max_aggregate_bankcard_utilisation_l12m']<=178.39)&(df['total_cl_open_pl_trades_l12m']>=3600.0)&(df['total_cl_open_pl_trades_l12m']<=200000.0)&(df['worst_rating_l3m']>=1.5)&(df['worst_rating_l3m']<=9.0))],choicelist=[1],default=0)
    df['no_of_60p_accs_ever_no_of_deduped_inquiries_l12m_4']=np.select(condlist=[((df['no_of_60p_accs_ever']>-6.01)&(df['no_of_60p_accs_ever']<=0)&(df['no_of_deduped_inquiries_l12m']>5)&(df['no_of_deduped_inquiries_l12m']<=7))],choicelist=[1],default=0)
    df['worst_rating_l24m_cc_util_revolving_l1m_1387']=np.select(condlist=[((df['worst_rating_l24m']>1.5)&(df['worst_rating_l24m']<=9)&(df['cc_util_revolving_l1m']>-6.01)&(df['cc_util_revolving_l1m']<=-1))],choicelist=[1],default=0)
    df['no_of_trades_total_bal_of_open_pl_l12m_6726']=np.select(condlist=[((df['no_of_trades']>15)&(df['no_of_trades']<=24)&(df['total_bal_of_open_pl_l12m']>247.67)&(df['total_bal_of_open_pl_l12m']<=51449))],choicelist=[1],default=0)
    df['no_of_open_cc_trades_total_bal_open_trades_l12m_5298']=np.select(condlist=[((df['no_of_open_cc_trades']>-1)&(df['no_of_open_cc_trades']<=1)&(df['total_bal_open_trades_l12m']>-5.01)&(df['total_bal_open_trades_l12m']<=28475.83))],choicelist=[1],default=0)
    return df

def getNapsScore(row):
    
    zz = (-1.7249)+(row['no_of_60p_accs_ever_no_of_30P_cc_l12m_8953']*-0.6572)+(row['worst_rating_l6m']*-0.3441)+(row['worst_rating_l24m_cc_util_revolving_l1m_1387']*0.8992)+\
         (row['no_of_open_cc_trades_rnkXno_of_business_general_trades_rnkXno_of_90p_accs_ever_rnkX3091']*1.0623)+(row['current_utilization_of_top_wallet_bankcard8']*0.6604)+(row['no_of_deduped_inquiries_l6m']*0.0361)+\
         (row['max_aggregate_bankcard_utilisation_l12m_cc_util_revolving_l1m_6020']*-0.8515)+(row['no_of_deduped_inquiries_by_total_bal_open_trades_l12m']*31.4963)+(row['no_of_60p_accs_ever_no_of_deduped_inquiries_l12m_4']*-0.452)+\
         (row['no_of_trades_pl_2w_gl_cd_worst_rating_l24m__unsecured_high_credit_sum_42']*-0.7234)+(row['cc_util_revolving_l1m_unsecured_high_credit_sum_6330']*0.3369)+\
         (row['max_aggregate_bankcard_utilisation_l12m_rnkXworst_rating_l3m_rnkXtotal_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades_rnkX150']*2.9448)+(row['no_of_60p_accs_ever_rnkXno_of_open_cc_trades_rnkXno_tr_opened_l3m_rnkX1196']*0.6413)+\
         (row['max_aggregate_bankcard_utilisation_l12m_unsecured_high_credit_sum_3292']*-0.7401)+(row['total_bal_open_trades_l12m_no_of_trades_pl_2w_gl_cd_4682']*-0.5306)+(row['no_of_trades_total_bal_of_open_pl_l12m_6726']*-1.0659)+\
         (row['no_of_trades_total_cl_of_cc_trades_l12m_9968']*0.3131)+(row['cc_util_revolving_l1m_rnkXno_of_deduped_inquiries_l3m_rnkXunsecured_high_credit_sum_rnkX8903']*1.3062)+\
         (row['worst_rating_l3m_no_of_cc_trades_60P_l12m_7369']*-0.6157)+(row['max_aggregate_bankcard_utilisation_l12m_no_of_deduped_inquiries_www']*0.8942)+(row['no_of_open_cc_trades_total_bal_open_trades_l12m_5298']*0.5325)+\
         (row['max_aggregate_bankcard_utilisation_l12m_rnkXtotal_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades_rnkXsecured_balances_sum_rnkX447']*-0.4146)+(row['months_since_oldest_trade_no_of_deduped_inquiries_l1m']*0.2631)+\
         (row['max_aggregate_bankcard_utilisation_l12m_rnkXtotal_bal_open_auto_trades_l12m_rnkXno_of_deduped_inquiries_l3m_rnkX6945']*-1.5711)+(row['no_of_90p_accs_ever_no_of_30P_cc_l24m_3957']*-1.1005)+\
         (row['worst_rating_l24m_no_pl_sanctionedamt_lt30K_l36m_7642']*0.472)+(row['total_bal_open_trades_l12m_worst_rating_l12m']*0.6542)+(\
             row['worst_rating_l24m_no_of_cc_lt30p_l12m_541']*0.8891)+(row['max_aggregate_bankcard_utilisation_l12m_rnkXno_of_business_general_trades_rnkXworst_rating_l3m_rnkX571']*1.6077)+\
             (row['no_of_60p_accs_ever_rnkXno_of_deduped_inquiries_l12m_rnkXno_of_30P_cc_l12m_rnkX6968']*1.6329)+(row['no_of_trades_no_of_trades_pl_2w_gl_cd_8980']*1.9295)+\
             (row['worst_rating_l3m_no_of_90p_accs_ever_6844']*0.9481)+(row['max_aggregate_bankcard_utilisation_l12m_rnkXtotal_cl_open_pl_trades_l12m_rnkXworst_rating_l3m_rnkX2111']*1.0015)

    r = np.exp(zz)/(1+np.exp(zz))
    scr = int(max(350,min(575*(r**(-0.08)),900)))

    return scr

final['SECURED_ACCOUNTS_COUNT']=final['Secured Accounts Count']
final['UNSECURED_ACCOUNTS_COUNT']=final['Unsecured Accounts Count']
final['SECURED_HIGH_CREDIT_SUM']=final['Secured High Credit Sum']
final['UNSECURED_HIGH_CREDIT_SUM']=final['Unsecured High Credit Sum']
final['SECURED_AMOUNT_OVERDUE_SUM']=final['Secured Amount Overdue Sum']
final['UNSECURED_AMOUNT_OVERDUE_SUM']=final['Unsecured Amount Overdue Sum']
final['SECURED_BALANCES_SUM']=final['Secured Balances Sum']
final['UNSECURED_BALANCES_SUM']=final['Unsecured Balances Sum']
              
final = preprocessor(final)
final['naps_new']= final.apply(getNapsScore,axis=1)
final['naps_new_bkt'] = final.apply(naps_score_bkt,var='naps_new',axis=1)

In [30]:
perf_full = pd.DataFrame()
for i in range(len(perf)):
    a = pd.DataFrame()
    disb = str(perf.loc[i,'disb_yymm'].year)+("0"+str(perf.loc[i,'disb_yymm'].month))[-2:]
    for mob in range(perf.loc[i,'months_since_disb']):
        thisdt = perf.loc[i,'disb_yymm']+relativedelta(months=mob)
        month = str(thisdt.year)+("0"+str(thisdt.month))[-2:]
        pos = 0

        ptr = 12*(thisdt.year-perf_startdate.year)+(thisdt.month-perf_startdate.month)

        xPlusDPD = 0
        thirtyPlusDPD = 0
        sixtyPlusDPD = 0
        ninetyPlusDPD = 0
        bounced = 0
        POSactive = 1

        if ptr>=0:
            dpd = perf.loc[i,'DPD'+str(ptr)]
            POS = perf.loc[i,'Prin'+str(ptr)]
            try:
                recovery = perf.loc[i,'recovery']
            except:
                recovery = 0
                
            if POS>0:
                POSactive = 1
            else:
                POSactive = 0
            presStatus = perf.loc[i,'Presentation'+str(ptr)]

            if 'BOUNCE' in str(presStatus).upper():
                bounced=1
            
            if dpd=='90+':
                xPlusDPD = 1
                thirtyPlusDPD = 1
                sixtyPlusDPD = 1
                ninetyPlusDPD = 1
            elif dpd=='Apr90+':
                xPlusDPD = 1
                thirtyPlusDPD = 1
                sixtyPlusDPD = 1
                ninetyPlusDPD = 1
            elif (('NPA' in str(dpd)) or (dpd=='NPA (Unsettled)') or (dpd=='NPA (unsettled)') or (dpd=='Settled')):
                xPlusDPD = 1
                thirtyPlusDPD = 1
                sixtyPlusDPD = 1
                ninetyPlusDPD = 1
            elif dpd=='60+':
                xPlusDPD = 1
                thirtyPlusDPD = 1
                sixtyPlusDPD = 1
                ninetyPlusDPD = 0
            elif dpd=='30+':
                xPlusDPD = 1
                thirtyPlusDPD = 1
                sixtyPlusDPD = 0
                ninetyPlusDPD = 0
            elif dpd=='1-29+':
                xPlusDPD = 1
                thirtyPlusDPD = 0
                sixtyPlusDPD = 0
                ninetyPlusDPD = 0
                
        else:
            cntr = 0
            dpd = '0'
            POS = perf.loc[i,'Principal']
            recovery = 0

        if (ninetyPlusDPD>0):
            POS = POS - recovery
            
        xPlusPOS = xPlusDPD*POS
        thirtyPlusPOS = thirtyPlusDPD*POS
        sixtyPlusPOS = sixtyPlusDPD*POS
        ninetyPlusPOS = ninetyPlusDPD*POS
        bouncedPOS = bounced*POS
        
        tmp = pd.DataFrame({'niro_user_id':[perf.loc[i,'niro_user_id']],'mob':[mob],'disb_dt':[disb],'month':[month],\
                          'POS':[POS],'xPlusDPD':[xPlusDPD],'thirtyPlusDPD':[thirtyPlusDPD],\
                            'sixtyPlusDPD':[sixtyPlusDPD],'ninetyPlusDPD':[ninetyPlusDPD],'xPlusPOS':[xPlusPOS],\
                            'thirtyPlusPOS':[thirtyPlusPOS],'sixtyPlusPOS':[sixtyPlusPOS],\
                            'ninetyPlusPOS':[ninetyPlusPOS],'bounced':[bounced],'bouncedPOS':[bouncedPOS],'POSactive':[POSactive],\
                            'Loan_Amount_perf':[perf.loc[i,'Principal']],'EMI_perf':[perf.loc[i,'Monthly Emi']],'Loan_Tenor_perf':[perf.loc[i,'Tenor In Months']],\
                            'Loan_ROI_perf':[perf.loc[i,'Interest Rate']],'Loan_PF_perf':[perf.loc[i,'PF']],\
                            'City_perf':[perf.loc[i,'City']],'State_perf':[perf.loc[i,'State']],'Age_perf':[perf.loc[i,'Age']],\
                            'Gender_perf':[perf.loc[i,'Gender']],'pincode_perf':[perf.loc[i,'Pin Code']]})
        a = pd.concat([a,tmp])
    perf_full  = pd.concat([perf_full,a])

In [31]:
perf_full['POS'].fillna(0,inplace=True)
perf_full['xPlusPOS'].fillna(0,inplace=True)
perf_full['thirtyPlusPOS'].fillna(0,inplace=True)
perf_full['sixtyPlusPOS'].fillna(0,inplace=True)
perf_full['ninetyPlusPOS'].fillna(0,inplace=True)

In [32]:
perf_full.head()

Unnamed: 0,niro_user_id,mob,disb_dt,month,POS,xPlusDPD,thirtyPlusDPD,sixtyPlusDPD,ninetyPlusDPD,xPlusPOS,thirtyPlusPOS,sixtyPlusPOS,ninetyPlusPOS,bounced,bouncedPOS,POSactive,Loan_Amount_perf,EMI_perf,Loan_Tenor_perf,Loan_ROI_perf,Loan_PF_perf,City_perf,State_perf,Age_perf,Gender_perf,pincode_perf
0,0a2c0c4e-e0bf-4e50-909e-25ad6272119e,0,202112,202112,52797.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,1,52797,5069,12,27.0,0.044813,Bangalore,KARNATAKA,39,Female,560085
0,0a2c0c4e-e0bf-4e50-909e-25ad6272119e,1,202112,202201,52797.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,1,52797,5069,12,27.0,0.044813,Bangalore,KARNATAKA,39,Female,560085
0,0a2c0c4e-e0bf-4e50-909e-25ad6272119e,2,202112,202202,48915.9325,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,1,52797,5069,12,27.0,0.044813,Bangalore,KARNATAKA,39,Female,560085
0,0a2c0c4e-e0bf-4e50-909e-25ad6272119e,3,202112,202203,44947.540981,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,1,52797,5069,12,27.0,0.044813,Bangalore,KARNATAKA,39,Female,560085
0,0a2c0c4e-e0bf-4e50-909e-25ad6272119e,4,202112,202204,40889.860653,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,1,52797,5069,12,27.0,0.044813,Bangalore,KARNATAKA,39,Female,560085


In [33]:
perf_data = perf_full.drop_duplicates(subset = ['niro_user_id'])[['niro_user_id', 'Loan_Amount_perf', 'EMI_perf', 'Loan_Tenor_perf', 'Loan_ROI_perf', 
                                                      'Loan_PF_perf', 'City_perf', 'State_perf', 'Age_perf', 'Gender_perf', 'pincode_perf']]

#### Event Data creation

In [35]:
master_data = final.merge(perf_full.loc[(perf_full.mob == 3), ['niro_user_id', 'xPlusDPD', 'thirtyPlusDPD', 'sixtyPlusDPD', 'ninetyPlusDPD', 'xPlusPOS', 'thirtyPlusPOS', 'sixtyPlusPOS', 'ninetyPlusPOS', 'bounced', 'bouncedPOS']].rename(columns = {'xPlusDPD': 'xP3M', 'thirtyPlusDPD': '30P3M','sixtyPlusDPD':'60P3M','ninetyPlusDPD': '90P3M', 'xPlusPOS': 'POS xP3M', 'thirtyPlusPOS': 'POS 30P3M', 'sixtyPlusPOS': 'POS 60P3M', 'ninetyPlusPOS': 'POS 90P3M', 'bounced': 'bounced 3M', 'bouncedPOS': 'bouncedPOS 3M'}), on = 'niro_user_id', how = 'left') \
.merge(perf_full.loc[(perf_full.mob == 6), ['niro_user_id', 'xPlusDPD', 'thirtyPlusDPD', 'sixtyPlusDPD', 'ninetyPlusDPD', 'xPlusPOS', 'thirtyPlusPOS', 'sixtyPlusPOS', 'ninetyPlusPOS', 'bounced', 'bouncedPOS']].rename(columns = {'xPlusDPD': 'xP6M', 'thirtyPlusDPD': '30P6M','sixtyPlusDPD':'60P6M','ninetyPlusDPD': '90P6M', 'xPlusPOS': 'POS xP6M', 'thirtyPlusPOS': 'POS 30P6M', 'sixtyPlusPOS': 'POS 60P6M', 'ninetyPlusPOS': 'POS 90P6M', 'bounced': 'bounced 6M', 'bouncedPOS': 'bouncedPOS 6M'}), on = 'niro_user_id', how = 'left') \
.merge(perf_full.loc[(perf_full.mob == 12), ['niro_user_id', 'xPlusDPD', 'thirtyPlusDPD', 'sixtyPlusDPD', 'ninetyPlusDPD', 'xPlusPOS', 'thirtyPlusPOS', 'sixtyPlusPOS', 'ninetyPlusPOS', 'bounced', 'bouncedPOS']].rename(columns = {'xPlusDPD': 'xP12M', 'thirtyPlusDPD': '30P12M','sixtyPlusDPD':'60P12M','ninetyPlusDPD': '90P12M', 'xPlusPOS': 'POS xP12M', 'thirtyPlusPOS': 'POS 30P12M', 'sixtyPlusPOS': 'POS 60P12M', 'ninetyPlusPOS': 'POS 90P12M', 'bounced': 'bounced 12M', 'bouncedPOS': 'bouncedPOS 12M'}), on = 'niro_user_id', how = 'left') \
.merge(perf_data, on = 'niro_user_id', how = 'left')

In [36]:
print('Shape before dropping: ', master_data.shape)
master_data.drop_duplicates(subset=['niro_user_id'], keep='first',inplace=True)
print('Shape after dropping: ', master_data.shape)

Shape before dropping:  (42438, 220)
Shape after dropping:  (40718, 220)


#### Bucketing logic

In [37]:
master_data['Age_perf_bkt'] = np.where(master_data['Age_perf'] <= 22, 'A) 21-22',
                                np.where(master_data['Age_perf'] <= 25, 'B) 23-25',
                                    np.where(master_data['Age_perf'] <= 30, 'C) 26-30',
                                        np.where(master_data['Age_perf'] <= 35, 'D) 31-35',
                                            np.where(master_data['Age_perf'] <= 40, 'E) 36-40',
                                                np.where(master_data['Age_perf'] <= 45, 'F) 41-45',
                                                    np.where(master_data['Age_perf'] <= 50, 'G) 46-50',
                                                        np.where(master_data['Age_perf'] > 50, 'H) 50+',np.nan))))))))

#### Pincode mapping

In [62]:
master_data['pincode_mismatch'] = np.where(master_data['pincode_perf'].isna() | master_data['bureau_pincode'].isna(),np.nan,
                                        np.where(master_data['pincode_perf'] != master_data['bureau_pincode'],1,0))

In [64]:
master_data['pincode_perf'] = master_data['pincode_perf'].astype('Int64')
master_data['bureau_pincode'] = master_data['bureau_pincode'].astype('Int64')

In [66]:
df = pd.read_csv('PL_pincode_mapping.csv')
df.head()

Unnamed: 0,CURRENT_LOCATION_CLASSIFICATION,DISTRICT,EFFECTIVE_START_DATE,PERMHOUSE_LOCATION_CLASSIFICATION,PINCODE,STATE,TALUK
0,GREEN,BALESWAR,02-06-2023,GREEN,756029,ODISHA,BASTA
1,GREEN,KHORDA,02-06-2023,GREEN,751016,ODISHA,BHUBANESWAR
2,RED,KULLU,02-06-2023,GREEN,175136,HIMACHAL PRADESH,KULLU(T)
3,GREEN,CUTTACK,02-06-2023,GREEN,754022,ODISHA,TANGI CHOUDWAR
4,RED,BOKARO,02-06-2023,GREEN,827012,JHARKHAND,CHAS


In [70]:
df_ll = pd.read_excel('All-active-Pincodes-Niro.xlsx', sheet_name='LL')
df_payu = pd.read_excel('All-active-Pincodes-Niro.xlsx', sheet_name='Payu')
df_muthoot = pd.read_excel('All-active-Pincodes-Niro.xlsx', sheet_name='Muthoot')
df_piramal = pd.read_excel('All-active-Pincodes-Niro.xlsx', sheet_name='Piramal')

In [71]:
df_ll.head()

Unnamed: 0,Pincode,City,State,pin_service_ll
0,110001,Central Delhi,DELHI,1
1,110002,Central Delhi,DELHI,1
2,110003,Central Delhi,DELHI,1
3,110004,Central Delhi,DELHI,1
4,110005,Central Delhi,DELHI,1


In [75]:
master_data = master_data.merge(df[['PINCODE', 'CURRENT_LOCATION_CLASSIFICATION']], left_on = 'pincode_perf', right_on = 'PINCODE', how = 'left').rename(columns = {'CURRENT_LOCATION_CLASSIFICATION':'pincode_perf_classification'}).drop(columns = 'PINCODE') \
                .merge(df_ll[['Pincode', 'pin_service_ll']], left_on = 'pincode_perf', right_on = 'Pincode', how = 'left').drop(columns = 'Pincode') \
                .merge(df_payu[['Pincode', 'pin_service_payu']], left_on = 'pincode_perf', right_on = 'Pincode', how = 'left').drop(columns = 'Pincode') \
                .merge(df_muthoot[['Pincode', 'pin_service_muthoot']], left_on = 'pincode_perf', right_on = 'Pincode', how = 'left').drop(columns = 'Pincode') \
                .merge(df_piramal[['Pincode', 'pin_service_piramal']], left_on = 'pincode_perf', right_on = 'Pincode', how = 'left').drop(columns = 'Pincode')

In [76]:
master_data.head()

Unnamed: 0,niro_opportunity_id,category,user_id,principal_amount,interest_rate,tenure,selfie_liveness_score,selfie_liveness_result,kyc_status,pan_retry_count,nach_retry_count,remote_cpv_status,ckyc_status,supply,propensity,offer_type,customer_availability,income_source,purpose_of_loan,monthly_take_home_income,employer_name,nature_of_business,cpv_need_to_initiate_at,residence_ownership_type,marital_status,designation,cpv_type,agency_status,demand,disbursement_date,bureau_gender,bureau_pincode,Calculated_Age,bureau_city,bureau_state,native_language,tli_id,pii_id,max_aggregate_bankcard_utilisation_l12m,cc_util_revolving_l1m,bank_card_payment_category,no_of_60p_accs_ever,no_of_deduped_inquiries,total_cl_open_mortgage_trades_l12m,total_balance_open_mortgage_trades_l12m,total_bal_of_open_pl_l12m,months_since_oldest_trade,no_of_mortgage_trades,no_of_open_cc_trades,no_of_business_general_trades,no_of_30p_accs_ever,no_of_trades,total_bal_open_trades_l12m,total_bal_open_auto_trades_l12m,months_since_recent_chargedoff180P,total_cl_open_auto_trades_l12m,total_cl_open_pl_trades_l12m,no_of_chargedoff180P,total_cl_of_cc_trades_l12m,no_of_90p_accs_ever,total_bal_of_chargedoff_trades,worst_rating_l12m,current_utilization_of_top_wallet_bankcard,no_of_trades_pl_2w_gl_cd,total_bal_open_trades_l12m_except_wo_cc_od,no_of_deduped_inquiries_l12m,no_of_deduped_inquiries_l6m,no_of_deduped_inquiries_l3m,no_of_deduped_inquiries_l1m,worst_rating_l24m,worst_rating_l6m,worst_rating_l3m,worst_rating_l1m,no_of_60p_accs_l24m,no_of_60p_accs_l12m,no_of_trades_bounced_l36m,no_of_trades_bounced_l12m,no_of_cc_trades_60P_l24m,no_of_30P_cc_l24m,no_of_cc_trades_60P_l12m,no_of_30P_cc_l12m,no_of_cc_lt30p_l12m,no_cc_sanctionedamt_lt30K_l36m,no_pl_sanctionedamt_lt30K_l36m,no_tr_opened_l6m,worst_dpd_l36m,total_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades,no_tr_opened_l12m,no_tr_opened_l3m,per_of_30p_accs_ever,total_payment_amount_bankcard_accs_l3m,max_revolving_monthly_spend_l12m,total_monthly_obligations_l3m,total_balance_bankcard_accs_l3m,no_of_trades_unsec,total_payment_amount_l3m,worst_dpd_l2m,Secured Accounts Count,Unsecured Accounts Count,Secured High Credit Sum,Unsecured High Credit Sum,Secured Amount Overdue Sum,Unsecured Amount Overdue Sum,Secured Balances Sum,Unsecured Balances Sum,Own Accounts count,Other Accounts count,CIBILTUSC3 Score Value,createdate,Current_Salary,available_income,foir_dlq,foir_cibil,foir_trd,foir_mean,New_FOIR,naps_score,rnk,disbursement_month,ctc,CIBIL,naps_band,no_of_open_cc_trades_bkt,Years on Bureau_bkt,no_of_deduped_inquiries_bkt,Secured Loan_bkt,Income_bkt,FOIR_Band,Time_on_Bureau_bkt,cust_category,cust_hirisk,niro_user_id,SECURED_ACCOUNTS_COUNT,UNSECURED_ACCOUNTS_COUNT,SECURED_HIGH_CREDIT_SUM,UNSECURED_HIGH_CREDIT_SUM,SECURED_AMOUNT_OVERDUE_SUM,UNSECURED_AMOUNT_OVERDUE_SUM,SECURED_BALANCES_SUM,UNSECURED_BALANCES_SUM,CC_INACTIVE,CC_NOBC,CC_REVOLVER,CC_RVLRPLUS,CC_TRANSACTOR,CC_TRANPLUS,credithungry,no_of_open_cc_trades_rnkXno_of_business_general_trades_rnkXno_of_90p_accs_ever_rnkX3091,max_aggregate_bankcard_utilisation_l12m_cc_util_revolving_l1m_6020,no_of_trades_pl_2w_gl_cd_worst_rating_l24m__unsecured_high_credit_sum_42,max_aggregate_bankcard_utilisation_l12m_rnkXworst_rating_l3m_rnkXtotal_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades_rnkX150,max_aggregate_bankcard_utilisation_l12m_unsecured_high_credit_sum_3292,no_of_trades_total_cl_of_cc_trades_l12m_9968,worst_rating_l3m_no_of_cc_trades_60P_l12m_7369,max_aggregate_bankcard_utilisation_l12m_rnkXtotal_bal_open_trades_l12m_minus_total_bal_of_chargedoff_trades_rnkXsecured_balances_sum_rnkX447,max_aggregate_bankcard_utilisation_l12m_rnkXtotal_bal_open_auto_trades_l12m_rnkXno_of_deduped_inquiries_l3m_rnkX6945,worst_rating_l24m_no_pl_sanctionedamt_lt30K_l36m_7642,worst_rating_l24m_no_of_cc_lt30p_l12m_541,no_of_60p_accs_ever_rnkXno_of_deduped_inquiries_l12m_rnkXno_of_30P_cc_l12m_rnkX6968,worst_rating_l3m_no_of_90p_accs_ever_6844,no_of_60p_accs_ever_no_of_30P_cc_l12m_8953,current_utilization_of_top_wallet_bankcard8,no_of_deduped_inquiries_by_total_bal_open_trades_l12m,cc_util_revolving_l1m_unsecured_high_credit_sum_6330,no_of_60p_accs_ever_rnkXno_of_open_cc_trades_rnkXno_tr_opened_l3m_rnkX1196,total_bal_open_trades_l12m_no_of_trades_pl_2w_gl_cd_4682,cc_util_revolving_l1m_rnkXno_of_deduped_inquiries_l3m_rnkXunsecured_high_credit_sum_rnkX8903,max_aggregate_bankcard_utilisation_l12m_no_of_deduped_inquiries_www,months_since_oldest_trade_no_of_deduped_inquiries_l1m,no_of_90p_accs_ever_no_of_30P_cc_l24m_3957,total_bal_open_trades_l12m_worst_rating_l12m,max_aggregate_bankcard_utilisation_l12m_rnkXno_of_business_general_trades_rnkXworst_rating_l3m_rnkX571,no_of_trades_no_of_trades_pl_2w_gl_cd_8980,max_aggregate_bankcard_utilisation_l12m_rnkXtotal_cl_open_pl_trades_l12m_rnkXworst_rating_l3m_rnkX2111,no_of_60p_accs_ever_no_of_deduped_inquiries_l12m_4,worst_rating_l24m_cc_util_revolving_l1m_1387,no_of_trades_total_bal_of_open_pl_l12m_6726,no_of_open_cc_trades_total_bal_open_trades_l12m_5298,naps_new,naps_new_bkt,xP3M,30P3M,60P3M,90P3M,POS xP3M,POS 30P3M,POS 60P3M,POS 90P3M,bounced 3M,bouncedPOS 3M,xP6M,30P6M,60P6M,90P6M,POS xP6M,POS 30P6M,POS 60P6M,POS 90P6M,bounced 6M,bouncedPOS 6M,xP12M,30P12M,60P12M,90P12M,POS xP12M,POS 30P12M,POS 60P12M,POS 90P12M,bounced 12M,bouncedPOS 12M,Loan_Amount_perf,EMI_perf,Loan_Tenor_perf,Loan_ROI_perf,Loan_PF_perf,City_perf,State_perf,Age_perf,Gender_perf,pincode_perf,Age_perf_bkt,pincode_mismatch,pincode_perf_classification,pin_service_ll,pin_service_payu,pin_service_muthoot,pin_service_piramal
0,c1f020b6-a886-4d11-9f1f-9efede7923c7,CAT-D,b2c949d0-fca7-43b4-bacf-5cd02d96fb72,61029,27,10,1.0,PASS,PENDING,0,0.0,COMPLETED,COMPLETED,quikr,HiiProp,PRE_APPROVED,Yes,Self Employed,Purchase,15000,-,Boutique,Residence,Family owned,MARRIED,-,REMOTE,Positive,liquiloans,2023-03-13,Female,753001,42.0,CUTTACK,ODISHA,English,a197259d-327b-42e1-bc73-220e1d1bc12a,680ff9db-0cf6-4b7b-9140-e35e099ce314,-1.0,-1.0,NOBC,0.0,9.0,-1.0,-1.0,-1.0,15.0,0.0,-1.0,0.0,0.0,6.0,90184.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,5.0,90184.0,4.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,6.0,6.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,-1.0,1.0,1.0,-1.0,5.0,0.0,0.0,-1.0,-1.0,16440.0,-1.0,5.0,29002.0,1.0,0.0,6.0,0.0,210147.0,0.0,0.0,0.0,90184.0,0.0,6.0,755.0,2023-01-01,48000.0,23490.8,0.5,0.4,0.5,0.47,56.77,696.0,1.0,2023-03,15000.0,750-757,695-705,No Credit Cards,1-2 Yrs,4-6,No Secured loan,<=15000,50-60%,12-18Mths,Unsecured Loan 50K+,0,b2c949d0-fca7-43b4-bacf-5cd02d96fb72,0.0,6.0,0.0,210147.0,0.0,0.0,0.0,90184.0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,9.979597e-05,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,700,690-704,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,25625.450699,,,,,,,,,,,61029.0,6880.0,10.0,26.9,0.048993,Khorda,ODISHA,42.0,Female,752054,F) 41-45,1.0,GREEN,1.0,1.0,1.0,1.0
1,00e05a7c-b1ec-4284-9dfe-e3158fed0b5e,CAT-B,d1ae63a0-b89e-436f-be83-898c42b92a31,212606,27,24,1.0,PASS,PENDING,4,0.0,REQUIRED,COMPLETED,niro,MedProp,PRE_APPROVED,Yes,Self Employed,Business Development,45000,-,service provider,Residence,Family owned,MARRIED,-,REMOTE,Positive,liquiloans,2023-06-02,Male,999998,46.0,NAN,,Hindi,81779f4e-1332-4326-a878-8457cabab90a,dd14d4ed-bc0e-432c-b0f3-2fa3d2c82c44,9.95,0.0,RVLRPLUS,0.0,20.0,-1.0,-1.0,659473.0,107.0,0.0,2.0,0.0,0.0,12.0,661171.0,-1.0,-1.0,-1.0,808820.0,0.0,96000.0,0.0,-1.0,1.0,3.0,9.0,659473.0,8.0,2.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,12.0,12.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,2.0,1.0,-1.0,4.0,0.0,0.0,4397.0,23.0,9848.9,1698.0,12.0,4397.0,1.0,0.0,8.0,0.0,1037882.0,0.0,0.0,0.0,661171.0,0.0,8.0,761.0,2023-02-01,121323.0,37713.2,0.5,0.4,0.45,0.45,57.03,769.0,1.0,2023-06,45000.0,761-766,760-780,1-2,5+ Yrs,7+,No Secured loan,40000-50000,50-60%,7+ Yrs,Carded 50K+,0,d1ae63a0-b89e-436f-be83-898c42b92a31,0.0,8.0,0.0,1037882.0,0.0,0.0,0.0,661171.0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,3.024936e-05,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,749,743-751,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,212606.0,11551.0,24.0,26.9,0.049001,Agra,UTTAR PRADESH,46.0,Male,282007,G) 46-50,1.0,RED,1.0,1.0,1.0,1.0
2,b55813db-dd34-4767-a3cd-d02b57e42db2,CAT-D,ff9e4435-662c-4c8e-b6ec-3cfbe0f2b6ff,53068,27,12,0.0,FAIL,COMPLETED,1,0.0,COMPLETED,PENDING,niro,LowProp,PRE_APPROVED,Yes,Salaried,Purchase,30500,Hitech Net Zone,-,Residence,Self owned,MARRIED,Administrator,PHYSICAL,Positive,payu,2023-06-07,Male,600073,41.0,CHENNAI,TAMIL NADU,Tamil,8b120df4-2967-4a28-b4b4-477c13f89d3e,3fe47752-525c-4408-9818-e838283e37de,0.0,0.0,INACTIVE,0.0,0.0,-1.0,-1.0,-1.0,100.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.0,-1.0,-1.0,-1.0,0.0,60000.0,0.0,-1.0,1.0,-2.0,0.0,-1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,1.0,-1.0,0.0,0.0,0.0,-2.0,0.0,0.0,0.0,1.0,-2.0,1.0,0.0,1.0,0.0,4178.0,0.0,0.0,0.0,0.0,0.0,1.0,798.0,2023-04-01,48000.0,28000.0,0.5,0.4,0.45,0.45,39.25,742.0,1.0,2023-06,30500.0,785+,728-743,1-2,5+ Yrs,<=1,No Secured loan,30000-35000,35-50%,7+ Yrs,Carded 50K+,0,ff9e4435-662c-4c8e-b6ec-3cfbe0f2b6ff,0.0,1.0,0.0,4178.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1e-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,723,714-728,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,36925.828651,36925.828651,0.0,0.0,1.0,36925.828651,,,,,,,,,,,53068.0,5093.0,12.0,26.9,0.048994,Kanchipuram,TAMIL NADU,41.0,Male,600073,F) 41-45,0.0,GREEN,1.0,1.0,1.0,1.0
3,98298a73-6df1-49e8-a390-5afd8477e1b4,CAT-B,803af291-d31b-49b7-8fb1-c606a58ea2c1,393464,20,36,1.0,PASS,COMPLETED,1,21.0,COMPLETED,PENDING,housing,MedProp,PRE_APPROVED,Yes,Salaried,House Renovation,140000,IBM India Private Limited,-,Residence,Rented,MARRIED,Senior technical engineer,PHYSICAL,Positive,payu,2023-07-23,Male,500001,32.0,HYDERABAD,ANDHRA PRADESH,English,c1147f0c-ec4f-42c4-ab25-c148dbe2b7d4,bf5bfe58-e0c0-476a-b6e2-b31a56101ef9,69.57,0.0,TRANSACTOR,0.0,15.0,-1.0,-1.0,4316607.0,80.0,0.0,1.0,0.0,0.0,8.0,4911728.0,-1.0,-1.0,-1.0,5105000.0,0.0,750000.0,0.0,-1.0,1.0,42.0,6.0,4594898.0,5.0,3.0,2.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,8.0,8.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,-1.0,4.0,0.0,0.0,-2.0,406707.0,129440.5,316830.0,8.0,90329.0,0.0,0.0,8.0,0.0,5926779.0,0.0,0.0,0.0,4911728.0,0.0,8.0,778.0,2023-07-01,480000.0,104413.6,0.5,0.4,0.43,0.44,54.08,793.0,1.0,2023-07,140000.0,777-784,781-805,1-2,5+ Yrs,4-6,No Secured loan,75000+,50-60%,5-7 Yrs,Carded 50K+,0,803af291-d31b-49b7-8fb1-c606a58ea2c1,0.0,8.0,0.0,5926779.0,0.0,0.0,0.0,4911728.0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,3.053915e-06,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,747,743-751,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,393464.0,14602.0,36.0,19.9,0.028999,K.V.Rangareddy,ANDHRA PRADESH,31.0,Male,500018,D) 31-35,1.0,GREEN,1.0,1.0,1.0,1.0
4,988136ff-87fb-4421-bfdb-2a530c5286e6,CAT-D,a500085e-c970-4846-a973-73247599552c,53068,24,12,1.0,UNKNOWN,COMPLETED,0,0.0,COMPLETED,PENDING,snapdeal,HiiProp,PRE_APPROVED,Yes,Salaried,Marriage,30500,Texo Fashions,-,Residence,Family owned,MARRIED,Supervisor,REMOTE,Positive,payu,2023-02-27,Male,641606,31.0,COIMBATORE,TAMIL NADU,English,f24a32c3-e531-4c44-bf1e-9797ff384867,99c93407-f185-4cbd-ba98-e1f8e71a7cdf,-1.0,-1.0,NOBC,0.0,17.0,-1.0,-1.0,1000.0,12.0,0.0,-1.0,0.0,0.0,3.0,30279.0,-1.0,-1.0,-1.0,1000.0,0.0,-1.0,0.0,-1.0,1.0,-1.0,3.0,30279.0,5.0,3.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,3.0,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-5.0,1.0,2.0,1.0,-1.0,3.0,1.0,0.0,-1.0,-1.0,5084.0,-1.0,3.0,-2.0,0.0,0.0,3.0,0.0,40829.0,0.0,0.0,0.0,30279.0,0.0,3.0,729.0,2023-01-01,48000.0,26486.05,0.5,0.5,0.5,0.5,49.28,693.0,1.0,2023-02,30500.0,<=736,<=694,No Credit Cards,1-2 Yrs,4-6,No Secured loan,30000-35000,35-50%,<=12Mths,,0,a500085e-c970-4846-a973-73247599552c,0.0,3.0,0.0,40829.0,0.0,0.0,0.0,30279.0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0.0005614452,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,665,<=689,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,53068.0,5016.0,12.0,23.9,0.048994,Erode,TAMIL NADU,32.0,Male,641605,D) 31-35,1.0,YELLOW,1.0,1.0,1.0,1.0


In [77]:
master_data.to_excel(os.getcwd() + '/Analysis/master_data_Jan_1911.xlsx', index=False)