In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

In [2]:
def grap_year(data):
    data = str(data)
    return int(data[:4])

def grap_month(data):
    data = str(data)
    return int(data[4:])

In [3]:
# 날짜 처리
data = pd.read_csv('201901-202003.csv')
data = data.fillna('')
data['year'] = data['REG_YYMM'].apply(lambda x: grap_year(x))
data['month'] = data['REG_YYMM'].apply(lambda x: grap_month(x))
data = data.drop(['REG_YYMM'], axis=1)

In [4]:
# 데이터 정제
df = data.copy()
df = df.drop(['CARD_CCG_NM', 'HOM_CCG_NM'], axis=1)

columns = ['CARD_SIDO_NM', 'STD_CLSS_NM', 'HOM_SIDO_NM', 'AGE', 'SEX_CTGO_CD', 'FLC', 'year', 'month']
df = df.groupby(columns).sum().reset_index(drop=False)

In [5]:
# 인코딩
dtypes = df.dtypes
encoders = {}
for column in df.columns:
    if str(dtypes[column]) == 'object':
        encoder = LabelEncoder()
        encoder.fit(df[column])
        encoders[column] = encoder
        
df_num = df.copy()        
for column in encoders.keys():
    encoder = encoders[column]
    df_num[column] = encoder.transform(df[column])

In [94]:
df_num

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC,year,month,CSTMR_CNT,AMT,CNT
0,0,0,0,1,1,1,2019,1,4,311200,4
1,0,0,0,1,1,1,2019,2,3,605000,3
2,0,0,0,1,1,1,2019,6,3,139000,3
3,0,0,0,1,1,1,2019,8,3,27500,3
4,0,0,0,1,1,1,2019,9,3,395500,3
...,...,...,...,...,...,...,...,...,...,...,...
1057389,16,40,16,6,2,5,2019,3,3,148000,4
1057390,16,40,16,6,2,5,2019,5,5,329800,7
1057391,16,40,16,6,2,5,2019,10,7,557800,7
1057392,16,40,16,6,2,5,2019,12,3,247800,3


In [6]:
# feature, target 설정
train_num = df_num.sample(frac=1, random_state=0)
train_features = train_num.drop(['CSTMR_CNT', 'AMT', 'CNT'], axis=1)
train_target = np.log1p(train_num['AMT'])

In [7]:
# 예측 템플릿 만들기
CARD_SIDO_NMs = df_num['CARD_SIDO_NM'].unique()
STD_CLSS_NMs  = df_num['STD_CLSS_NM'].unique()
HOM_SIDO_NMs  = df_num['HOM_SIDO_NM'].unique()
AGEs          = df_num['AGE'].unique()
SEX_CTGO_CDs  = df_num['SEX_CTGO_CD'].unique()
FLCs          = df_num['FLC'].unique()
years         = [2020]
months        = [4, 7]

temp = []
for CARD_SIDO_NM in CARD_SIDO_NMs:
    for STD_CLSS_NM in STD_CLSS_NMs:
        for HOM_SIDO_NM in HOM_SIDO_NMs:
            for AGE in AGEs:
                for SEX_CTGO_CD in SEX_CTGO_CDs:
                    for FLC in FLCs:
                        for year in years:
                            for month in months:
                                temp.append([CARD_SIDO_NM, STD_CLSS_NM, HOM_SIDO_NM, AGE, SEX_CTGO_CD, FLC, year, month])
temp = np.array(temp)
temp = pd.DataFrame(data=temp, columns=train_features.columns)

In [121]:
temp

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC,year,month
0,0,0,0,1,1,1,2020,4
1,0,0,0,1,1,1,2020,7
2,0,0,0,1,1,2,2020,4
3,0,0,0,1,1,2,2020,7
4,0,0,0,1,1,3,2020,4
...,...,...,...,...,...,...,...,...
1658855,16,30,14,0,2,3,2020,7
1658856,16,30,14,0,2,4,2020,4
1658857,16,30,14,0,2,4,2020,7
1658858,16,30,14,0,2,5,2020,4


In [114]:
temp2 = temp[(temp['year']==2020) & (temp['month']==4)].drop(['year', 'month'], axis=1)
temp2
X_test = temp2

In [9]:
df2020 = df_num[(df_num['year']==2020) & (df_num['month']==3)] 
df20202 = df_num[(df_num['year']==2020) & (df_num['month']==2)]
df_final = pd.concat([df2020, df20202], axis=0)
df_final2 = df_final.drop(['CSTMR_CNT','AMT','CNT','year','month'], axis=1)
X_train = df_final2
y_train = df_final['AMT']

In [146]:
df_num

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC,year,month,CSTMR_CNT,AMT,CNT
0,0,0,0,1,1,1,2019,1,4,311200,4
1,0,0,0,1,1,1,2019,2,3,605000,3
2,0,0,0,1,1,1,2019,6,3,139000,3
3,0,0,0,1,1,1,2019,8,3,27500,3
4,0,0,0,1,1,1,2019,9,3,395500,3
...,...,...,...,...,...,...,...,...,...,...,...
1057389,16,40,16,6,2,5,2019,3,3,148000,4
1057390,16,40,16,6,2,5,2019,5,5,329800,7
1057391,16,40,16,6,2,5,2019,10,7,557800,7
1057392,16,40,16,6,2,5,2019,12,3,247800,3


In [135]:
df_final

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC,year,month,CSTMR_CNT,AMT,CNT
7,0,0,0,1,1,1,2020,3,3,427510,2
43,0,0,0,2,1,2,2020,3,38,5622890,37
59,0,0,0,2,2,2,2020,3,30,2296125,38
67,0,0,0,3,1,2,2020,3,3,257000,4
82,0,0,0,3,1,3,2020,3,51,6490800,48
...,...,...,...,...,...,...,...,...,...,...,...
1057327,16,40,16,4,1,4,2020,2,39,1445050,44
1057342,16,40,16,4,2,4,2020,2,38,1729900,49
1057357,16,40,16,5,1,5,2020,2,29,2201900,43
1057372,16,40,16,5,2,5,2020,2,22,1099200,38


In [148]:
temp2

Unnamed: 0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
0,201904,0,0,-4.304604e+10
1,201904,0,1,-5.312565e+10
2,201904,0,2,-5.343284e+10
3,201904,0,3,-5.343284e+10
4,201904,0,4,-5.343284e+10
...,...,...,...,...
692,201904,16,36,-2.014661e+10
693,201904,16,37,9.973443e+10
694,201904,16,38,-3.608227e+10
695,201904,16,39,-3.454720e+10


In [115]:
X_test

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC
0,0,0,0,1,1,1
2,0,0,0,1,1,2
4,0,0,0,1,1,3
6,0,0,0,1,1,4
8,0,0,0,1,1,5
...,...,...,...,...,...,...
1658850,16,30,14,0,2,1
1658852,16,30,14,0,2,2
1658854,16,30,14,0,2,3
1658856,16,30,14,0,2,4


In [155]:
y_train

7           427510
43         5622890
59         2296125
67          257000
82         6490800
            ...   
1057327    1445050
1057342    1729900
1057357    2201900
1057372    1099200
1057386     301700
Name: AMT, Length: 114759, dtype: int64

In [151]:
X_train

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC
7,0,0,0,1,1,1
43,0,0,0,2,1,2
59,0,0,0,2,2,2
67,0,0,0,3,1,2
82,0,0,0,3,1,3
...,...,...,...,...,...,...
1057327,16,40,16,4,1,4
1057342,16,40,16,4,2,4
1057357,16,40,16,5,1,5
1057372,16,40,16,5,2,5


In [156]:
from lightgbm import LGBMRegressor

lgbm_reg = LGBMRegressor(n_estimators = 1000, learning_rate = 0.1, num_leaves=4,subsample = 0.8, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)

In [157]:
lgbm_reg.fit(X_train, y_train)
pred = lgbm_reg.predict(X_test)

In [154]:
pred

array([-75370016.47694008, -71455844.09104624, -65394814.72455405, ...,
       -44215695.46390911, -43359999.90639818, -53301839.09726086])

In [158]:
pred

array([-38951853.03770236, -41539727.78046791, -42286169.02498484, ...,
       -38177369.22121436, -41696312.16143347, -48665049.92857932])

In [10]:
from sklearn.ensemble import RandomForestRegressor

rf_reg = RandomForestRegressor(n_jobs=-1)

params = {
    'n_estimators' : [500, 700, 1000, 1200],
    'max_depth' : [6,8,10,12],
    'min_samples_leaf' : [15,18, 21],
    'min_samples_split' : [98,16,20]
}

In [13]:
from sklearn.model_selection import GridSearchCV
rf_reg.fit(X_tr
           ain, y_train)

grid_cv = GridSearchCV(rf_reg, param_grid = params, cv=2, n_jobs=-1)
grid_cv.fit(X_train, y_train)

print('GridSearchCV 최고 평균 정확도 수치 : {0:.4f}'.format(grid_cv.best_score_))
print('GridSearchCV 최적의 파라미터 : ',grid_cv.best_params_)

GridSearchCV 최고 평균 정확도 수치 : 0.2927
GridSearchCV 최적의 파라미터 :  {'max_depth': 12, 'min_samples_leaf': 15, 'min_samples_split': 20, 'n_estimators': 700}


In [16]:
from sklearn.ensemble import RandomForestRegressor

rf_reg = RandomForestRegressor(max_depth = 12, min_samples_leaf = 15, min_samples_split = 20, n_estimators = 700, n_jobs=-1)
rf_reg.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=12,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=15, min_samples_split=20,
                      min_weight_fraction_leaf=0.0, n_estimators=700, n_jobs=-1,
                      oob_score=False, random_state=None, verbose=0,
                      warm_start=False)

In [17]:
rf_pred = rf_reg.predict(X_test)

In [18]:
rf_pred

array([ 3887895.48872388,  3439876.36512174, 10302552.77612659, ...,
        1479996.54419483,  1557310.92210149,  1526577.796477  ])

In [111]:
rf_pred = np.round(rf_pred)

In [112]:
len(rf_pred)

829430

In [116]:
X_test['AMT'] = rf_pred

In [117]:
X_test

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC,AMT
0,0,0,0,1,1,1,3887895.0
2,0,0,0,1,1,2,3439876.0
4,0,0,0,1,1,3,10302553.0
6,0,0,0,1,1,4,28870035.0
8,0,0,0,1,1,5,28449054.0
...,...,...,...,...,...,...,...
1658850,16,30,14,0,2,1,1184064.0
1658852,16,30,14,0,2,2,1438688.0
1658854,16,30,14,0,2,3,1479997.0
1658856,16,30,14,0,2,4,1557311.0


In [24]:
temp2

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC,AMT
0,0,0,0,1,1,1,3887895.0
2,0,0,0,1,1,2,3439876.0
4,0,0,0,1,1,3,10302553.0
6,0,0,0,1,1,4,28870035.0
8,0,0,0,1,1,5,28449054.0
...,...,...,...,...,...,...,...
1658850,16,30,14,0,2,1,1184064.0
1658852,16,30,14,0,2,2,1438688.0
1658854,16,30,14,0,2,3,1479997.0
1658856,16,30,14,0,2,4,1557311.0


In [118]:
# 예측
# temp2['AMT'] = np.round(pred, 0)
temp2['REG_YYMM'] = 2019*100 + 4
temp2 = temp2[['REG_YYMM', 'CARD_SIDO_NM', 'STD_CLSS_NM', 'AMT']]
temp2 = temp2.groupby(['REG_YYMM', 'CARD_SIDO_NM', 'STD_CLSS_NM']).sum().reset_index(drop=False)

In [119]:
X_test = X_test[['CARD_SIDO_NM', 'STD_CLSS_NM', 'AMT']]

In [120]:
X_test['REG_YYMM'] = 202004

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [121]:
X_test

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,AMT,REG_YYMM
0,0,0,3887895.0,202004
2,0,0,3439876.0,202004
4,0,0,10302553.0,202004
6,0,0,28870035.0,202004
8,0,0,28449054.0,202004
...,...,...,...,...
1658850,16,30,1184064.0,202004
1658852,16,30,1438688.0,202004
1658854,16,30,1479997.0,202004
1658856,16,30,1557311.0,202004


In [122]:
X_test = X_test.groupby(['REG_YYMM', 'CARD_SIDO_NM', 'STD_CLSS_NM']).sum().reset_index(drop=False)

In [123]:
X_test

Unnamed: 0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
0,202004,0,0,1.296890e+10
1,202004,0,1,1.381215e+10
2,202004,0,2,4.304794e+09
3,202004,0,3,3.434855e+09
4,202004,0,4,3.435337e+09
...,...,...,...,...
692,202004,16,36,1.013828e+10
693,202004,16,37,6.376354e+10
694,202004,16,38,4.285731e+09
695,202004,16,39,9.503209e+09


In [31]:
temp2

Unnamed: 0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
0,201904,0,0,1.296890e+10
1,201904,0,1,1.381215e+10
2,201904,0,2,4.304794e+09
3,201904,0,3,3.434855e+09
4,201904,0,4,3.435337e+09
...,...,...,...,...
692,201904,16,36,1.013828e+10
693,201904,16,37,6.376354e+10
694,201904,16,38,4.285731e+09
695,201904,16,39,9.503209e+09


In [109]:
# 디코딩 
X_test['CARD_SIDO_NM'] = encoders['CARD_SIDO_NM'].inverse_transform(X_test['CARD_SIDO_NM'])
X_test['STD_CLSS_NM'] = encoders['STD_CLSS_NM'].inverse_transform(X_test['STD_CLSS_NM'])

ValueError: y contains previously unseen labels: ['강원' '경기' '경남' '경북' '광주' '대구' '대전' '부산' '서울' '세종' '울산' '인천' '전남' '전북'
 '제주' '충남' '충북']

In [172]:
X_test['REG_YYMM']=202004

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,HOM_SIDO_NM,AGE,SEX_CTGO_CD,FLC,AMT
0,강원,건강보조식품 소매업,0,1,1,1,756808.0
2,강원,건강보조식품 소매업,0,1,1,2,613041.0
4,강원,건강보조식품 소매업,0,1,1,3,1338790.0
6,강원,건강보조식품 소매업,0,1,1,4,2212818.0
8,강원,건강보조식품 소매업,0,1,1,5,2208922.0
...,...,...,...,...,...,...,...
1658850,충북,정기 항공 운송업,14,0,2,1,159079.0
1658852,충북,정기 항공 운송업,14,0,2,2,343743.0
1658854,충북,정기 항공 운송업,14,0,2,3,421386.0
1658856,충북,정기 항공 운송업,14,0,2,4,487581.0


In [341]:
submission = pd.read_csv('submission.csv', index_col=0)

In [322]:
submission

Unnamed: 0_level_0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,202004,강원,건강보조식품 소매업,6.545358e+08
1,202004,강원,골프장 운영업,6.946645e+09
2,202004,강원,과실 및 채소 소매업,1.852209e+09
3,202004,강원,관광 민예품 및 선물용품 소매업,9.881226e+07
4,202004,강원,그외 기타 분류안된 오락관련 서비스업,9.524266e+07
...,...,...,...,...
1389,202007,충북,피자 햄버거 샌드위치 및 유사 음식점업,1.863351e+09
1390,202007,충북,한식 음식점업,2.809644e+10
1391,202007,충북,호텔업,2.451290e+08
1392,202007,충북,화장품 및 방향제 소매업,1.320792e+09


In [342]:
AMT = list(X_test['AMT'].values) * 2

In [343]:
len(AMT)

1394

In [344]:
submission['AMT'] = AMT

In [345]:
submission

Unnamed: 0_level_0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,202004,강원,건강보조식품 소매업,1.296890e+10
1,202004,강원,골프장 운영업,1.381215e+10
2,202004,강원,과실 및 채소 소매업,4.304794e+09
3,202004,강원,관광 민예품 및 선물용품 소매업,3.434855e+09
4,202004,강원,그외 기타 분류안된 오락관련 서비스업,3.435337e+09
...,...,...,...,...
1389,202007,충북,피자 햄버거 샌드위치 및 유사 음식점업,1.013828e+10
1390,202007,충북,한식 음식점업,6.376354e+10
1391,202007,충북,호텔업,4.285731e+09
1392,202007,충북,화장품 및 방향제 소매업,9.503209e+09


In [346]:
data['REG_YYMM'] = data['year'] * 100 + data['month']

In [347]:
data

Unnamed: 0,CARD_SIDO_NM,CARD_CCG_NM,STD_CLSS_NM,HOM_SIDO_NM,HOM_CCG_NM,AGE,SEX_CTGO_CD,FLC,CSTMR_CNT,AMT,CNT,year,month,REG_YYMM
0,강원,강릉시,건강보조식품 소매업,강원,강릉시,20s,1,1,4,311200,4,2019,1,201901
1,강원,강릉시,건강보조식품 소매업,강원,강릉시,30s,1,2,7,1374500,8,2019,1,201901
2,강원,강릉시,건강보조식품 소매업,강원,강릉시,30s,2,2,6,818700,6,2019,1,201901
3,강원,강릉시,건강보조식품 소매업,강원,강릉시,40s,1,3,4,1717000,5,2019,1,201901
4,강원,강릉시,건강보조식품 소매업,강원,강릉시,40s,1,4,3,1047300,3,2019,1,201901
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24697787,충북,충주시,휴양콘도 운영업,충북,충주시,30s,1,2,3,43300,4,2020,3,202003
24697788,충북,충주시,휴양콘도 운영업,충북,충주시,40s,1,3,3,35000,3,2020,3,202003
24697789,충북,충주시,휴양콘도 운영업,충북,충주시,50s,1,4,4,188000,6,2020,3,202003
24697790,충북,충주시,휴양콘도 운영업,충북,충주시,50s,2,4,4,99000,6,2020,3,202003


In [348]:
data2 = data[['REG_YYMM','CARD_SIDO_NM', 'STD_CLSS_NM', 'AMT']]

In [349]:
data2

Unnamed: 0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
0,201901,강원,건강보조식품 소매업,311200
1,201901,강원,건강보조식품 소매업,1374500
2,201901,강원,건강보조식품 소매업,818700
3,201901,강원,건강보조식품 소매업,1717000
4,201901,강원,건강보조식품 소매업,1047300
...,...,...,...,...
24697787,202003,충북,휴양콘도 운영업,43300
24697788,202003,충북,휴양콘도 운영업,35000
24697789,202003,충북,휴양콘도 운영업,188000
24697790,202003,충북,휴양콘도 운영업,99000


In [350]:
data2 = data2[data2['REG_YYMM'] == 202003]

In [351]:
data2

Unnamed: 0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
23437284,202003,강원,건강보조식품 소매업,2529000
23437285,202003,강원,건강보조식품 소매업,1133650
23437286,202003,강원,건강보조식품 소매업,570800
23437287,202003,강원,건강보조식품 소매업,4192828
23437288,202003,강원,건강보조식품 소매업,3358900
...,...,...,...,...
24697787,202003,충북,휴양콘도 운영업,43300
24697788,202003,충북,휴양콘도 운영업,35000
24697789,202003,충북,휴양콘도 운영업,188000
24697790,202003,충북,휴양콘도 운영업,99000


In [352]:
data2 = data2.groupby(['REG_YYMM', 'CARD_SIDO_NM', 'STD_CLSS_NM'])['AMT'].sum().reset_index()

In [353]:
data2.drop('REG_YYMM', axis=1, inplace=True)

In [354]:
data2

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,AMT
0,강원,건강보조식품 소매업,96059012
1,강원,골프장 운영업,2915797995
2,강원,과실 및 채소 소매업,994816943
3,강원,관광 민예품 및 선물용품 소매업,13317300
4,강원,그외 기타 스포츠시설 운영업,2075000
...,...,...,...
604,충북,피자 햄버거 샌드위치 및 유사 음식점업,1315245299
605,충북,한식 음식점업,16152482704
606,충북,호텔업,15248550
607,충북,화장품 및 방향제 소매업,428881434


In [355]:
data2

Unnamed: 0,CARD_SIDO_NM,STD_CLSS_NM,AMT
0,강원,건강보조식품 소매업,96059012
1,강원,골프장 운영업,2915797995
2,강원,과실 및 채소 소매업,994816943
3,강원,관광 민예품 및 선물용품 소매업,13317300
4,강원,그외 기타 스포츠시설 운영업,2075000
...,...,...,...
604,충북,피자 햄버거 샌드위치 및 유사 음식점업,1315245299
605,충북,한식 음식점업,16152482704
606,충북,호텔업,15248550
607,충북,화장품 및 방향제 소매업,428881434


In [356]:
submission

Unnamed: 0_level_0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,202004,강원,건강보조식품 소매업,1.296890e+10
1,202004,강원,골프장 운영업,1.381215e+10
2,202004,강원,과실 및 채소 소매업,4.304794e+09
3,202004,강원,관광 민예품 및 선물용품 소매업,3.434855e+09
4,202004,강원,그외 기타 분류안된 오락관련 서비스업,3.435337e+09
...,...,...,...,...
1389,202007,충북,피자 햄버거 샌드위치 및 유사 음식점업,1.013828e+10
1390,202007,충북,한식 음식점업,6.376354e+10
1391,202007,충북,호텔업,4.285731e+09
1392,202007,충북,화장품 및 방향제 소매업,9.503209e+09


In [334]:
# submission = submission[submission['REG_YYMM'] == 202004]

In [357]:
submission.drop('AMT', axis=1, inplace=True)

In [358]:
submission = submission.merge(data2, left_on=['CARD_SIDO_NM','STD_CLSS_NM'], right_on=['CARD_SIDO_NM','STD_CLSS_NM'], how='left')

In [359]:
submission.fillna(0, inplace=True)

In [360]:
indexes = submission[submission['AMT'] == 0].index

In [361]:
len(AMT)

1394

In [362]:
submission

Unnamed: 0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
0,202004,강원,건강보조식품 소매업,9.605901e+07
1,202004,강원,골프장 운영업,2.915798e+09
2,202004,강원,과실 및 채소 소매업,9.948169e+08
3,202004,강원,관광 민예품 및 선물용품 소매업,1.331730e+07
4,202004,강원,그외 기타 분류안된 오락관련 서비스업,0.000000e+00
...,...,...,...,...
1389,202007,충북,피자 햄버거 샌드위치 및 유사 음식점업,1.315245e+09
1390,202007,충북,한식 음식점업,1.615248e+10
1391,202007,충북,호텔업,1.524855e+07
1392,202007,충북,화장품 및 방향제 소매업,4.288814e+08


In [267]:
submission['AMT'] 

0      9.605901e+07
1      2.915798e+09
2      9.948169e+08
3      1.331730e+07
4      0.000000e+00
           ...     
692    1.315245e+09
693    1.615248e+10
694    1.524855e+07
695    4.288814e+08
696    1.273349e+07
Name: AMT, Length: 697, dtype: float64

In [363]:
submission.loc[indexes, 'AMT'] = 1.15

In [364]:
submission

Unnamed: 0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
0,202004,강원,건강보조식품 소매업,9.605901e+07
1,202004,강원,골프장 운영업,2.915798e+09
2,202004,강원,과실 및 채소 소매업,9.948169e+08
3,202004,강원,관광 민예품 및 선물용품 소매업,1.331730e+07
4,202004,강원,그외 기타 분류안된 오락관련 서비스업,1.150000e+00
...,...,...,...,...
1389,202007,충북,피자 햄버거 샌드위치 및 유사 음식점업,1.315245e+09
1390,202007,충북,한식 음식점업,1.615248e+10
1391,202007,충북,호텔업,1.524855e+07
1392,202007,충북,화장품 및 방향제 소매업,4.288814e+08


In [365]:
submission.index.name = 'id'
submission.to_csv('submission1.csv', encoding='utf-8-sig')
submission.head()

Unnamed: 0_level_0,REG_YYMM,CARD_SIDO_NM,STD_CLSS_NM,AMT
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,202004,강원,건강보조식품 소매업,96059010.0
1,202004,강원,골프장 운영업,2915798000.0
2,202004,강원,과실 및 채소 소매업,994816900.0
3,202004,강원,관광 민예품 및 선물용품 소매업,13317300.0
4,202004,강원,그외 기타 분류안된 오락관련 서비스업,1.15
