In [1]:
import pandas as pd
import numpy as np
import os
import glob
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# <font color='orange'>01. load data

In [2]:
data_rootpath = r'C:\Users\w10\Desktop\신용평가모형 세미나\2주차\데이터\dev_data'

In [3]:
os.listdir(data_rootpath)

['model_dev_cb_df.csv',
 'model_dev_inner_df.csv',
 'model_valid_cb_df.csv',
 'model_valid_inner_df.csv']

In [4]:
dev_inner_df = pd.read_csv(os.path.join(data_rootpath, 'model_dev_inner_df.csv'))
valid_inner_df = pd.read_csv(os.path.join(data_rootpath, 'model_valid_inner_df.csv'))

In [5]:
# dictionary에 담기
df_dict = dict()

df_dict['dev_inner'] = dev_inner_df
df_dict['valid_inner'] = valid_inner_df

In [6]:
for key, df in df_dict.items():
    print(key, df.shape)

dev_inner (240016, 67)
valid_inner (40942, 67)


In [7]:
df_dict['dev_inner'].head(2)

Unnamed: 0,no,신청월,직군그룹,근속기간1,웰컴_소득구간,심사원장_소득구간,EXECUTE_FG,DESTRUCT_FG,Y_2017_FG,Y_2018_FG,...,BAD_금융_카드_12,BAD_금융_캐피탈_12,BAD_금융_저축은행_12,BAD_대부_12,BAD_금융_6,BAD_금융_카드_6,BAD_금융_캐피탈_6,BAD_금융_저축은행_6,BAD_대부_6,BAD
0,1,201612,,5.0,5.0,0.0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,201612,D,5.0,2.0,0.0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
# 모형검증데이터 월별 나누기
df_dict['valid_inner_1908'] = valid_inner_df[valid_inner_df['신청월']==201908]
df_dict['valid_inner_1909'] = valid_inner_df[valid_inner_df['신청월']==201909]
df_dict['valid_inner_1910'] = valid_inner_df[valid_inner_df['신청월']==201910]

# <font color='orange'>02. 기부도 개별 요건별 현황

In [14]:
pre_bad_cols = df_dict['dev_inner'].columns[df_dict['dev_inner'].columns.str.contains('PRE_BAD')]

In [15]:
pre_bad_cols

Index(['PRE_BAD_FG_1', 'PRE_BAD_FG_2', 'PRE_BAD_FG_3', 'PRE_BAD_FG_4',
       'PRE_BAD_FG_5', 'PRE_BAD_FG_6', 'PRE_BAD_FG_7', 'PRE_BAD_FG_8',
       'TOTAL_PRE_BAD_FG'],
      dtype='object')

In [16]:
# 전체 건수
pre_bad_dict = dict()

for key, df in df_dict.items():
    this = pd.DataFrame()
    for col in pre_bad_cols:
        value = df[col].sum(axis = 0)
        # BS 원안
        bad_1 = df[df[col] == 1]['BAD'].sum(axis = 0)
        this = this.append(pd.DataFrame({'cnt' : value, 
                                         'bad_cnt' : bad_1,
                                        }, index = [0]),
                           ignore_index = True)
        
        
    this.index = pre_bad_cols
    pre_bad_dict[key] = this

In [17]:
pre_bad_dict['dev_inner']

Unnamed: 0,cnt,bad_cnt
PRE_BAD_FG_1,3651,1093
PRE_BAD_FG_2,987,222
PRE_BAD_FG_3,1465,477
PRE_BAD_FG_4,1202,394
PRE_BAD_FG_5,20070,9704
PRE_BAD_FG_6,5570,2738
PRE_BAD_FG_7,6140,4337
PRE_BAD_FG_8,24692,18751
TOTAL_PRE_BAD_FG,41297,26110


### 저장

In [18]:
# 한 엑셀파일에 여러 sheet로 저장


save_path = os.path.join(r'C:\Users\w10\Desktop\신용평가모형 세미나\2주차\데이터\기부도현황', '요건별_기부도현황_결과.xlsx')
with pd.ExcelWriter(save_path) as writer:
    for key, result in pre_bad_dict.items():
        result.to_excel(writer, sheet_name = key, )

# <font color='orange'>03. 기부도 누적 요건별 현황

In [19]:
pre_bad_cols = df_dict['dev_inner'].columns[df_dict['dev_inner'].columns.str.contains('PRE_BAD')]

In [21]:
pre_bad_cols = pre_bad_cols.drop(['TOTAL_PRE_BAD_FG'])
pre_bad_cols

Index(['PRE_BAD_FG_1', 'PRE_BAD_FG_2', 'PRE_BAD_FG_3', 'PRE_BAD_FG_4',
       'PRE_BAD_FG_5', 'PRE_BAD_FG_6', 'PRE_BAD_FG_7', 'PRE_BAD_FG_8'],
      dtype='object')

In [23]:
cum_pre_bad_dict = dict()
for key, df in df_dict.items():
    tmp_df = pd.DataFrame()
    
    for num, col in enumerate(pre_bad_cols):
        cnt = df[df[col] == 1].shape[0]
        bad_1 = df[df[col] == 1]['BAD'].sum(axis = 0)
        value = pd.DataFrame({'cnt' : cnt,
                              'bad_cnt' : bad_1,
                             },index = [col])
        tmp_df = tmp_df.append(value, )
            
        remove_idx = df[df[col] == 1]['no'].values
        df = df[df['no'].isin(remove_idx) == False]
    
#     tmp_df.columns = ['대상자',]
    cum_pre_bad_dict[key] = tmp_df
    
        

In [24]:
cum_pre_bad_dict['dev_inner']

Unnamed: 0,cnt,bad_cnt
PRE_BAD_FG_1,3651,1093
PRE_BAD_FG_2,0,0
PRE_BAD_FG_3,0,0
PRE_BAD_FG_4,0,0
PRE_BAD_FG_5,16419,8611
PRE_BAD_FG_6,2386,1135
PRE_BAD_FG_7,2205,1919
PRE_BAD_FG_8,16636,13352


### 저장

In [25]:
# 한 엑셀파일에 여러 sheet로 저장


save_path = os.path.join(r'C:\Users\w10\Desktop\신용평가모형 세미나\2주차\데이터\기부도현황', '요건별_누적_기부도현황_결과.xlsx')
with pd.ExcelWriter(save_path) as writer:
    for key, result in cum_pre_bad_dict.items():
        result.to_excel(writer, sheet_name = key, )

In [26]:
for key, df in df_dict.items():
    print(key)
    print(df.shape[0])
    print(df['BAD'].sum())

dev_inner
240016
107261
valid_inner
40942
6253
valid_inner_1908
15037
3189
valid_inner_1909
13613
2044
valid_inner_1910
12292
1020
