In [2]:
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime

# <font color='orange'>01. load data

In [3]:
rootpath = r'C:\Users\w10\Desktop\신용평가모형 세미나\2주차\데이터\temp'

In [4]:
os.listdir(rootpath)

['temp1_inner_total_df.csv', 'temp2_inner_total_df.csv', 'total_df.pkl']

In [5]:
total_df = pd.read_pickle(os.path.join(rootpath, 'total_df.pkl'), )

In [6]:
inner_total_df = pd.read_csv(os.path.join(rootpath, 'temp2_inner_total_df.csv'), )

In [7]:
total_df.head(2)

Unnamed: 0,no,신청월,직군그룹,근속기간1,웰컴_소득구간,심사원장_소득구간,AS0000136,AS0000137,AS0000138,AS0000139,...,P2O000500_1_s6,LA0000001_1_s9,LA0000020_1_s9,LA0000227_1_s9,P2E000500_1_s9,LA0000001_1_s12,LA0000020_1_s12,LA0000204_1_s12,LA0000227_1_s12,P2O000500_1_s12
0,1,201612,,5.0,5.0,0.0,26,-1,1,0,...,0.0,0.15,0.15,345.45,0.0,0.09,0.09,214.29,214.29,0.0
1,2,201612,D,5.0,2.0,0.0,-1,32,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
inner_total_df.head(2)

Unnamed: 0,no,신청월,직군그룹,근속기간1,웰컴_소득구간,심사원장_소득구간,EXECUTE_FG,DESTRUCT_FG,Y_2017_FG,Y_2018_FG,...,GRADE_ZERO_FG,PRE_BAD_FG_1,PRE_BAD_FG_2,PRE_BAD_FG_3,PRE_BAD_FG_4,PRE_BAD_FG_5,PRE_BAD_FG_6,PRE_BAD_FG_7,PRE_BAD_FG_8,TOTAL_PRE_BAD_FG
0,1,201612,,5.0,5.0,0.0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,201612,D,5.0,2.0,0.0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0


# <font color='orange'>02. 주택담보대출 대상자 선정 

```
전체
    LA0000126 : 미해제주택담보대출총기관수 > 0
    L23001001 : 미상환 주택담보 대출총건수 > 0
    A5WC0000008500 : 미상환주택담보대출총건수중개심사거절용 == 1
    A5WC0000008400 : 미상환주택담보대출총건수인터넷등심사거절용 == 1

    
업권별
    LA0001016 : 미해제 은행업계 주택담보대출 총 건수 > 0
    LA0005008 : 보험업계 주택담보대출 총 건수 > 0
    LA0006011 : 미해제 카드업계 주택담보대출 총 건수 > 0
    LA0007201 : 미해제 리스렌탈업계 주택담보대출(역모기지론포함) 총 잔액 > 0
    LA0008008 : 미해제 저축은행업계 주택담보대출 총 건수 > 0
    LA0012005 : 조합업계(농협및수협단위조합) 주택담보대출 총 건수 > 0
    LA0012208 : 미해제 신용협동조합업계 주택담보대출(역모기지론포함) 총 잔액 > 0
    LA0014008 : 미해제 캐피탈업계 주택담보대출 총 건수 > 0
    LA0029203 : 미해제 새마을금고업계 주택담보대출(역모기지론포함) 총 잔액 > 0
    LA0099252 : 미해제 기타업계 주택담보대출(역모기지론포함) 총 잔액 > 0
    LU0021006_1 : 전은연_계좌별_저축은행업권_미상환_주택담보대출_총건수_대부업_저축은행_인터넷전문은행_정보_공유용__소비자금융CB > 0
    LU0024013_1 : 전은연 계좌별 대부업권 미상환 주택담보대출 총 건수(대부업 저축은행 인터넷전문은행 정보 공유용) > 0
    LU0025004_1 : 신정원 계좌별 P2P연계대부업권 미상환 주택담보대출 총건수(대부업 저축은행 인터넷전문은행 정보 공유용) > 0
``````

In [9]:
# input cols
house_cols = ['LA0000126', 'L23001001','A5WC0000008500', 'A5WC0000008400',
             'LA0001016', 'LA0005008', 'LA0006011', 'LA0007201', 'LA0008008', 'LA0012005', 'LA0012208',
             'LA0014008', 'LA0029203', 'LA0099252',
             'LU0021006_1', 'LU0024013_1', 'LU0025004_1', ]

In [10]:
# output cols
house_cols_name = ['TOTAL_HOUSE_FG_1','TOTAL_HOUSE_FG_2','TOTAL_HOUSE_FG_3','TOTAL_HOUSE_FG_4',
                  'BANK_HOUSE_FG', 'INS_HOUSE_FG', 'CARD_HOUSE_FG', 'LEASE_HOUSE_FG',
                  'SB_HOUSE_FG', 'COOP_HOUSE_FG', 'CREDUNION_HOUSE_FG', 'CAP_HOUSE_FG',
                  'MG_HOUSE_FG', 'ETC_HOUSE_FG', 
                  'ACC_SB_HOUSE_FG', 'ACC_ML_HOUSE_FG', 'ACC_P2P_HOUSE_FG']

In [11]:
len(house_cols)

17

In [12]:
for house_col, col_name in zip(house_cols, house_cols_name):
    total_df[col_name] = total_df[house_col].apply(lambda x : 1 if x > 0 else 0)


In [13]:
total_df[['TOTAL_HOUSE_FG_3','TOTAL_HOUSE_FG_4']].sum()

TOTAL_HOUSE_FG_3    418979
TOTAL_HOUSE_FG_4    418979
dtype: int64

* 추가 고려 사항
  * A5WC0000008500, A5WC0000008400 변수
    * 0을 가지는 레코드 없음
    * 구간값으로 형성되있는 것을 확인
      * 1이면 주담대 보유
      * 2이면 주담대 보유 x

In [14]:
# def _welc_house_col_fg(A5WC0000008500, A5WC0000008400):
#     total_house_seg_3 = 0
#     total_house_seg_4 = 0
    
#     if A5WC0000008500 == 1:
#         total_house_seg_3 = 1
        
#     if A5WC0000008400 == 1:
#         total_house_seg_4 = 1
        
#     return total_house_seg_3, total_house_seg_4

# total_df[result_cols] = total_df[need_cols].apply(lambda x : _welc_house_col_fg(x['A5WC0000008500'], x['A5WC0000008400']),
#                                                   axis = 1, result_type = 'expand')


In [15]:
def welc_house_fg(welc_house_col):
    result = 0
    if welc_house_col==1:
        result=1
        
    return result

In [16]:
result_cols = ['TOTAL_HOUSE_FG_3','TOTAL_HOUSE_FG_4',]
need_cols = ['A5WC0000008500', 'A5WC0000008400']

In [17]:
for in_col, out_col in zip(need_cols, result_cols):
    total_df[out_col] = total_df[in_col].apply(lambda x: welc_house_fg(x))

In [18]:
inner_total_df[house_cols_name] = total_df[house_cols_name]

### 최종 주택담보대출 보유 항목 생성

In [19]:
# def _get_total_house_seg(TOTAL_HOUSE_SEG_1, TOTAL_HOUSE_SEG_2,
#                         TOTAL_HOUSE_SEG_3, TOTAL_HOUSE_SEG_4
#                         ):
#     total_house_seg = 0
#     if (TOTAL_HOUSE_SEG_1 > 0 or TOTAL_HOUSE_SEG_2 > 0 
#        or TOTAL_HOUSE_SEG_3 > 0 or TOTAL_HOUSE_SEG_4 > 0
#        ):
#         total_house_seg = 1
        
#     return total_house_seg

# total_df['TOTAL_HOUSE_FG'] = total_df[need_cols].apply(lambda x: _get_total_house_seg(x['TOTAL_HOUSE_FG_1'], 
#                                                                                        x['TOTAL_HOUSE_FG_2'],
#                                                                                        x['TOTAL_HOUSE_FG_3'], 
#                                                                                        x['TOTAL_HOUSE_FG_4'],
#                                                                                       ), axis = 1)

In [20]:
need_cols = ['TOTAL_HOUSE_FG_1', 'TOTAL_HOUSE_FG_2',
             'TOTAL_HOUSE_FG_3', 'TOTAL_HOUSE_FG_4']

In [21]:
total_df['TOTAL_HOUSE_FG'] = total_df[need_cols].sum(axis=1)
total_df['TOTAL_HOUSE_FG'] = total_df['TOTAL_HOUSE_FG'].apply(lambda x: 1 if x>0 else 0)

In [22]:
inner_total_df['TOTAL_HOUSE_FG'] = total_df['TOTAL_HOUSE_FG']

# <font color='orange'>03. 저장

In [23]:
save_path = r'C:\Users\w10\Desktop\신용평가모형 세미나\2주차\데이터\temp'

In [24]:
inner_total_df.to_csv(os.path.join(save_path, 'temp3_inner_total_df.csv'), index = False)