In [1]:
import pandas as pd
import numpy as np
import oracledb
from sqlalchemy import create_engine
import os
import glob
pd.set_option('display.max_columns',100)
import time

# A. Query Data

In [2]:
# credentials
username = os.environ['DBA_USR']
password = os.environ['DBA_PW']
host = '10.220.50.121'
port = '1661'
service_name = 'ANALYTIC'

In [3]:
connection_string = f'oracle+oracledb://{username}:{password}@{host}:{port}/?service_name={service_name}'

In [4]:
# create engine
engine = create_engine(connection_string)

In [5]:
# create list of all dates
allDates = ['20241031','20240930','20240829','20240731','20240628','20240531',
            '20240430','20240329','20240229','20240131','20231229','20231130']

In [None]:
allMLs = []
for date in allDates:
    try:
        # print in case of errors
        print(f'Running {date}')

        # open and read the file as a single buffer
        sqlQuery = f'''WITH 
ML AS (
SELECT DISTINCT BASE_DT, AGREE_ID, GCIF_NO, CIF_NO, NOTE_NO, PRD_TP, 
PRD_NM,PRD_SEGMENT, ORG_LMT_AMT, CURR_LMT_AMT, BAL, ORIG_INT_RT, CURR_INT_RT, OTH_CURR_INT_RT,
CLCT_RTNG_FCL, ALLOW_PCT, ALLOW_PCT_ADD, RESTRUCT_CD, PASTDUE_SINCE_DT, PASTDUE_DAYS, WRITE_OFF_AMT,
WRITE_OFF_DT, WRITE_OFF_YN  from PDA.MASTER_LOAN
WHERE BASE_DT = {date}
AND PRD_NM LIKE '%Credit Card%'
AND STATUS = '00001'),
C360 AS (
SELECT DISTINCT GCIF_NO, SEGMENT, SEGMENT_FUNDING, CUST_NM, OPEN_DATE, 
LAST_ACTIVE_DATE, CUST_TP, GENDER_CD, AGE, EMPLOYMENT_TYPE, 
MICRO_SEGMENT, CA_BAL, CA_AVG,
SA_BAL, SA_AVG, TD_NOA, TD_BAL, TD_AVG,
SY_FUNDING_BAL, SY_FUNDING_AVG, FUNDING_NOA, FUNDING_BAL,
FUNDING_AVG, NOA_DORMANT, AUM, CC_PLAFOND, CC_BAL, PL_PLAFOND, 
PLOAN_BAL, TR_PLAFOND, TR_BAL, PB_PLAFOND, PB_BAL, 
PRK_PLAFOND, PRK_BAL, PPB_PLAFOND, PPB_BAL, BG_PLAFOND,
BG_BAL, LC_PLAFOND, LC_BAL, SY_LOAN_PLAFOND, SY_LOAN_BAL,
KPR_PLAFOND, KPR_BAL, KPM_PLAFOND, KPM_BAL, COLLECT,
LUM, BA_UL_BAL, BA_ALL_BAL, MF_BAL, BONDS_BAL,TRB,
M2U_TRX_TOTAL, M2U_TRXAMT_FIN, CRTRX_MTD, CRTRX_AMT_MTD, DBTRX_MTD,
DBTRX_AMT_MTD
FROM PDA.CUSTOMER_360
WHERE BASE_DT = {date}
)
SELECT *,
CASE WHEN PASTDUE_SINCE_DT = 0 THEN 'PERFORMING'
WHEN PASTDUE_SINCE_DT > 1 AND PASTDUE_SINCE_DT <= 30 THEN 'GRACE PERIOD'
WHEN PASTDUE_SINCE_DT > 30 THEN 'DELINQUENT'
END AS CC_STATUS
FROM ML LEFT JOIN C360 ON ML.GCIF_NO = C360.GCIF_NO'''

        # run query and store data in df
        with engine.connect() as connection:
            ML = pd.read_sql(sqlQuery, connection) 
        print('SQL run')

        # change column names into UPPER CASE
        ML.columns = [x.upper() for x in ML.columns]
    except Exception as e:
        print(e)
        ML = pd.DataFrame()
    finally:
        allMLs.append(ML)
        print(f'{date} queried successfully')
        time.sleep(5)

Running 20241031
SQL run
20241031 queried successfully
Running 20240930
SQL run
20240930 queried successfully
Running 20240829
SQL run
20240829 queried successfully
Running 20240731
SQL run
20240731 queried successfully
Running 20240628
SQL run
20240628 queried successfully
Running 20240531
SQL run
20240531 queried successfully
Running 20240430
SQL run
20240430 queried successfully
Running 20240329
SQL run
20240329 queried successfully
Running 20240229
SQL run
20240229 queried successfully
Running 20240131
SQL run
20240131 queried successfully
Running 20231229
SQL run
20231229 queried successfully
Running 20231130
SQL run
20231130 queried successfully


In [7]:
# sqlQuery = f'''WITH 
# ML AS (
# SELECT DISTINCT AGREE_ID, GCIF_NO, CIF_NO, NOTE_NO, PRD_TP, 
# PRD_NM,PRD_SEGMENT, ORG_LMT_AMT, CURR_LMT_AMT, BAL, ORIG_INT_RT, CURR_INT_RT, OTH_CURR_INT_RT,
# CLCT_RTNG_FCL, ALLOW_PCT, ALLOW_PCT_ADD, RESTRUCT_CD, PASTDUE_SINCE_DT, PASTDUE_DAYS, WRITE_OFF_AMT,
# WRITE_OFF_DT, WRITE_OFF_YN  from PDA.MASTER_LOAN
# WHERE BASE_DT = '20241031'
# AND PRD_NM LIKE '%Credit Card%'
# AND STATUS = '00001'),
# C360 AS (
# SELECT DISTINCT GCIF_NO, SEGMENT, SEGMENT_FUNDING, CUST_NM, OPEN_DATE, 
# LAST_ACTIVE_DATE, CUST_TP, GENDER_CD, AGE, EMPLOYMENT_TYPE, 
# MICRO_SEGMENT, CA_BAL, CA_AVG,
# SA_BAL, SA_AVG, TD_NOA, TD_BAL, TD_AVG,
# SY_FUNDING_BAL, SY_FUNDING_AVG, FUNDING_NOA, FUNDING_BAL,
# FUNDING_AVG, NOA_DORMANT, AUM, CC_PLAFOND, CC_BAL, PL_PLAFOND, 
# PLOAN_BAL, TR_PLAFOND, TR_BAL, PB_PLAFOND, PB_BAL, 
# PRK_PLAFOND, PRK_BAL, PPB_PLAFOND, PPB_BAL, BG_PLAFOND,
# BG_BAL, LC_PLAFOND, LC_BAL, SY_LOAN_PLAFOND, SY_LOAN_BAL,
# KPR_PLAFOND, KPR_BAL, KPM_PLAFOND, KPM_BAL, COLLECT,
# LUM, BA_UL_BAL, BA_ALL_BAL, MF_BAL, BONDS_BAL,TRB,
# M2U_TRX_TOTAL, M2U_TRXAMT_FIN, CRTRX_MTD, CRTRX_AMT_MTD, DBTRX_MTD,
# DBTRX_AMT_MTD
# FROM PDA.CUSTOMER_360
# WHERE BASE_DT = '20241031'
# )
# SELECT * FROM ML 
# LEFT JOIN C360 
# ON ML.GCIF_NO = C360.GCIF_NO'''

# # run query and store data in df
# with engine.connect() as connection:
#   ML = pd.read_sql(sqlQuery, connection) 

# # change column names into UPPER CASE
# ML.columns = [x.upper() for x in ML.columns]

# B. EDA

In [8]:
# concat all MLs into one
MLs = pd.concat(allMLs, ignore_index=True)

  MLs = pd.concat(allMLs, ignore_index=True)


In [9]:
MLs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1995100 entries, 0 to 1995099
Data columns (total 83 columns):
 #   Column            Dtype         
---  ------            -----         
 0   BASE_DT           object        
 1   AGREE_ID          object        
 2   GCIF_NO           object        
 3   CIF_NO            object        
 4   NOTE_NO           object        
 5   PRD_TP            object        
 6   PRD_NM            object        
 7   PRD_SEGMENT       object        
 8   ORG_LMT_AMT       float64       
 9   CURR_LMT_AMT      float64       
 10  BAL               float64       
 11  ORIG_INT_RT       float64       
 12  CURR_INT_RT       float64       
 13  OTH_CURR_INT_RT   float64       
 14  CLCT_RTNG_FCL     object        
 15  ALLOW_PCT         float64       
 16  ALLOW_PCT_ADD     float64       
 17  RESTRUCT_CD       object        
 18  PASTDUE_SINCE_DT  object        
 19  PASTDUE_DAYS      object        
 20  WRITE_OFF_AMT     float64       
 21  WRITE_OF

In [10]:
MLs.columns

Index(['BASE_DT', 'AGREE_ID', 'GCIF_NO', 'CIF_NO', 'NOTE_NO', 'PRD_TP',
       'PRD_NM', 'PRD_SEGMENT', 'ORG_LMT_AMT', 'CURR_LMT_AMT', 'BAL',
       'ORIG_INT_RT', 'CURR_INT_RT', 'OTH_CURR_INT_RT', 'CLCT_RTNG_FCL',
       'ALLOW_PCT', 'ALLOW_PCT_ADD', 'RESTRUCT_CD', 'PASTDUE_SINCE_DT',
       'PASTDUE_DAYS', 'WRITE_OFF_AMT', 'WRITE_OFF_DT', 'WRITE_OFF_YN',
       'GCIF_NO', 'SEGMENT', 'SEGMENT_FUNDING', 'CUST_NM', 'OPEN_DATE',
       'LAST_ACTIVE_DATE', 'CUST_TP', 'GENDER_CD', 'AGE', 'EMPLOYMENT_TYPE',
       'MICRO_SEGMENT', 'CA_BAL', 'CA_AVG', 'SA_BAL', 'SA_AVG', 'TD_NOA',
       'TD_BAL', 'TD_AVG', 'SY_FUNDING_BAL', 'SY_FUNDING_AVG', 'FUNDING_NOA',
       'FUNDING_BAL', 'FUNDING_AVG', 'NOA_DORMANT', 'AUM', 'CC_PLAFOND',
       'CC_BAL', 'PL_PLAFOND', 'PLOAN_BAL', 'TR_PLAFOND', 'TR_BAL',
       'PB_PLAFOND', 'PB_BAL', 'PRK_PLAFOND', 'PRK_BAL', 'PPB_PLAFOND',
       'PPB_BAL', 'BG_PLAFOND', 'BG_BAL', 'LC_PLAFOND', 'LC_BAL',
       'SY_LOAN_PLAFOND', 'SY_LOAN_BAL', 'KPR_PLAFOND', 'KPR_

## 1. SAGA total monthly bad debt

In [None]:
monthly = pd.pivot_table(MLs, 
                         values=['GCIF_NO','CURR_LMT_AMT','BAL'],
                         index=['BASE_DT'],
                         columns=['']
                         aggfunc={'GCIF_NO':'count',
                                  'CURR_LMT_AMT':'sum'})

1. Hitung total bad debt SAGA all month
2. Bandingkan bad debt SAGA vs all other CC
2. Bandingkan 