In [1]:
import pandas as pd

# 데이터 로드
train_df = pd.read_csv("train.csv")  # 파일 경로가 올바른지 확인하세요

# 수치형 및 범주형 칼럼 구분
numerical_cols = train_df.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = train_df.select_dtypes(include=['object']).columns

# 수치형 및 범주형 칼럼 개수 계산
num_numerical = len(numerical_cols)
num_categorical = len(categorical_cols)

# 결과 출력
print(f"Number of Numerical Columns: {num_numerical}")
print(f"Number of Categorical Columns: {num_categorical}")


Number of Numerical Columns: 38
Number of Categorical Columns: 43


In [2]:
# 확인
train_df.columns
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [3]:
# MSSubClass는 단순 명목형 feature이므로 dtype을 object로 변경
train_df['MSSubClass'] = train_df['MSSubClass'].astype('object')
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   object 
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [4]:
# 결측치 찾고 결측치 소팅
missing = train_df.isnull().sum() / len(train_df) * 100
missing = missing[missing>0].sort_values(ascending=False)

# 결측치 비율 80% 넘는 칼럼 삭제위해 missing_drop 만듦
missing_drop = missing[missing>80]
missing_drop

PoolQC         99.520548
MiscFeature    96.301370
Alley          93.767123
Fence          80.753425
dtype: float64

In [5]:
# missing_drop 삭제
train_df = train_df.drop(missing_drop.index, axis=1)

In [6]:
# 확인
train_df.columns
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 77 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   object 
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   LotShape       1460 non-null   object 
 7   LandContour    1460 non-null   object 
 8   Utilities      1460 non-null   object 
 9   LotConfig      1460 non-null   object 
 10  LandSlope      1460 non-null   object 
 11  Neighborhood   1460 non-null   object 
 12  Condition1     1460 non-null   object 
 13  Condition2     1460 non-null   object 
 14  BldgType       1460 non-null   object 
 15  HouseStyle     1460 non-null   object 
 16  OverallQual    1460 non-null   int64  
 17  OverallCond    1460 non-null   int64  
 18  YearBuil

In [7]:
# 나머지 결측치 자료는 최빈값 대체하기.
# 최빈값 대체 이전에 missing 데이터프레임에도 drop 함께 해주기

missing = missing.drop(missing_drop.index, errors='ignore')
missing

# ex = train_df[missing]

ex = missing.index
numeric_feature = train_df[ex].select_dtypes(['int64', 'float64'])
categorical_feature = train_df[ex].select_dtypes('object')

numeric_feature

Unnamed: 0,LotFrontage,GarageYrBlt,MasVnrArea
0,65.0,2003.0,196.0
1,80.0,1976.0,0.0
2,68.0,2001.0,162.0
3,60.0,1998.0,0.0
4,84.0,2000.0,350.0
...,...,...,...
1455,62.0,1999.0,0.0
1456,85.0,1978.0,119.0
1457,66.0,1941.0,0.0
1458,68.0,1950.0,0.0


In [8]:
# Electrical BsmtFinType1 GarageCond GarageQual GarageYrBlt가 없습니다.

In [23]:
# 수치형 및 범주형 칼럼 구분 갱신
numerical_cols = train_df.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = train_df.select_dtypes(include=['object']).columns

# 수치형 및 범주형 칼럼 개수 갱신
num_numerical = len(numerical_cols)
num_categorical = len(categorical_cols)

pd.set_option('display.max_columns', None)
train_df[categorical_cols].describe(include='all')

Unnamed: 0,MSSubClass,MSZoning,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,SaleType,SaleCondition
count,1460,1460,1460,1460,1460,1460,1460,1460,1460,1460,1460,1460,1460,1460,1460,1460,1460,588,1460,1460,1460,1423,1423,1422,1423,1422,1460,1460,1460,1459,1460,1460,770,1379,1379,1379,1379,1460,1460,1460
unique,15,5,2,4,4,2,5,3,25,9,8,5,8,6,8,15,16,3,4,5,6,4,4,4,6,6,6,5,2,5,4,7,5,6,3,5,5,3,9,6
top,20,RL,Pave,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,TA,TA,PConc,TA,TA,No,Unf,Unf,GasA,Ex,Y,SBrkr,TA,Typ,Gd,Attchd,Unf,TA,TA,Y,WD,Normal
freq,536,1151,1454,925,1311,1459,1052,1382,225,1260,1445,1220,726,1141,1434,515,504,445,906,1282,647,649,1311,953,430,1256,1428,741,1365,1334,735,1360,380,870,605,1311,1326,1340,1267,1198


In [25]:
# 범주형 값을 최빈값으로 처리하기.
for col in categorical_feature.columns:
    mode_value = train_df[col].mode(dropna=True)[0]  # 최빈값 계산
    train_df[col].fillna(mode_value, inplace=True)   # 결측치 대체

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df[col].fillna(mode_value, inplace=True)   # 결측치 대체


In [27]:
# 검증
train_df[categorical_feature.columns].isnull().sum()

MasVnrType      0
FireplaceQu     0
GarageType      0
GarageFinish    0
GarageQual      0
GarageCond      0
BsmtFinType2    0
BsmtExposure    0
BsmtFinType1    0
BsmtCond        0
BsmtQual        0
Electrical      0
dtype: int64

In [29]:
# 수치형 값을 평균값으로 처리하기.
for col in numeric_feature.columns:
    mean_value = train_df[col].mean()
    train_df[col].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df[col].fillna(mean_value, inplace=True)


In [31]:
train_df[numeric_feature.columns].isnull().sum()

LotFrontage    0
GarageYrBlt    0
MasVnrArea     0
dtype: int64

In [33]:
# GrLivArea, LotArea, GarageArea 이상치 있는 레코드 삭제

train_df = train_df[train_df['Id'] != 692][train_df['Id'] != 1183]
train_df = train_df.drop(train_df[(train_df['GrLivArea']>4000) & (train_df['SalePrice']<12.5)].index)
train_df = train_df.drop(train_df[(train_df['LotArea']>150000)].index)
train_df = train_df.drop(train_df[(train_df['GarageArea']>1200) & (train_df['SalePrice']<12.5)].index)

  train_df = train_df[train_df['Id'] != 692][train_df['Id'] != 1183]


In [35]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1455 entries, 0 to 1459
Data columns (total 77 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1455 non-null   int64  
 1   MSSubClass     1455 non-null   object 
 2   MSZoning       1455 non-null   object 
 3   LotFrontage    1455 non-null   float64
 4   LotArea        1455 non-null   int64  
 5   Street         1455 non-null   object 
 6   LotShape       1455 non-null   object 
 7   LandContour    1455 non-null   object 
 8   Utilities      1455 non-null   object 
 9   LotConfig      1455 non-null   object 
 10  LandSlope      1455 non-null   object 
 11  Neighborhood   1455 non-null   object 
 12  Condition1     1455 non-null   object 
 13  Condition2     1455 non-null   object 
 14  BldgType       1455 non-null   object 
 15  HouseStyle     1455 non-null   object 
 16  OverallQual    1455 non-null   int64  
 17  OverallCond    1455 non-null   int64  
 18  YearBuilt    

In [37]:
# LotFrontage, LotArea 이상치 중 test 데이터 상의 최대값을 넘어가는 극단치들만 평균치로 대체
test_df = pd.read_csv('test.csv')

train_df['LotFrontage'].where(
    train_df['LotFrontage'] < test_df['LotFrontage'].agg('max'), 
    other=test_df['LotFrontage'].agg('mean'), 
    inplace=True)

train_df['LotArea'].where(
    train_df['LotArea'] < test_df['LotArea'].agg('max'), 
    other=test_df['LotArea'].agg('mean'), 
    inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['LotFrontage'].where(
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['LotArea'].where(
  train_df['LotArea'].where(


In [55]:
# 서수형 피처를 라벨링하기 전 unique 값 확인

ordinal_feature = ['ExterQual','ExterCond', 'BsmtQual', 'BsmtCond', 'HeatingQC', 'KitchenQual', 'GarageQual', 'GarageCond', 'FireplaceQu', 'OverallQual','OverallCond' ]

for i in ordinal_feature:
    print(f"{i}: ", train_df[i].unique())

ExterQual:  ['Gd' 'TA' 'Ex' 'Fa']
ExterCond:  ['TA' 'Gd' 'Fa' 'Po' 'Ex']
BsmtQual:  ['Gd' 'TA' 'Ex' 'Fa']
BsmtCond:  ['TA' 'Gd' 'Fa' 'Po']
HeatingQC:  ['Ex' 'Gd' 'TA' 'Fa' 'Po']
KitchenQual:  ['Gd' 'TA' 'Ex' 'Fa']
GarageQual:  ['TA' 'Fa' 'Gd' 'Ex' 'Po']
GarageCond:  ['TA' 'Fa' 'Gd' 'Po' 'Ex']
FireplaceQu:  ['Gd' 'TA' 'Fa' 'Ex' 'Po']
OverallQual:  [ 7  6  8  5  9  4 10  3  1  2]
OverallCond:  [5 8 6 7 4 2 3 9 1]


In [57]:
# 서수형 피처를 라벨링 하기.
# 서수형 데이터에 대한 상관관계 확인을 위해 labelEncoder를 활용.

# 5점 척도형 변수만 라벨링 진행
ordinal_feature_map = ['ExterQual','ExterCond', 'BsmtQual', 'BsmtCond', 'HeatingQC', 'KitchenQual', 'GarageQual', 'GarageCond', 'FireplaceQu']

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

# 사용자 지정 매핑 사전
mapping = {
    'Ex': 5,  # Excellent
    'Gd': 4,  # Good
    'TA': 3,  # Typical/Average
    'Fa': 2,  # Fair
    'Po': 1   # Poor
}

for col in ordinal_feature_map:
    train_df[col] = train_df[col].map(mapping)

In [59]:
train_df[ordinal_feature].info()

<class 'pandas.core.frame.DataFrame'>
Index: 1455 entries, 0 to 1459
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   ExterQual    1455 non-null   int64
 1   ExterCond    1455 non-null   int64
 2   BsmtQual     1455 non-null   int64
 3   BsmtCond     1455 non-null   int64
 4   HeatingQC    1455 non-null   int64
 5   KitchenQual  1455 non-null   int64
 6   GarageQual   1455 non-null   int64
 7   GarageCond   1455 non-null   int64
 8   FireplaceQu  1455 non-null   int64
 9   OverallQual  1455 non-null   int64
 10  OverallCond  1455 non-null   int64
dtypes: int64(11)
memory usage: 136.4 KB


In [85]:
# 명목형 변수 확인
nominal_cols = list(x for x in categorical_cols if x not in ordinal_feature)
train_df[nominal_cols].info()

<class 'pandas.core.frame.DataFrame'>
Index: 1455 entries, 0 to 1459
Data columns (total 31 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   MSSubClass     1455 non-null   object
 1   MSZoning       1455 non-null   object
 2   Street         1455 non-null   object
 3   LotShape       1455 non-null   object
 4   LandContour    1455 non-null   object
 5   Utilities      1455 non-null   object
 6   LotConfig      1455 non-null   object
 7   LandSlope      1455 non-null   object
 8   Neighborhood   1455 non-null   object
 9   Condition1     1455 non-null   object
 10  Condition2     1455 non-null   object
 11  BldgType       1455 non-null   object
 12  HouseStyle     1455 non-null   object
 13  RoofStyle      1455 non-null   object
 14  RoofMatl       1455 non-null   object
 15  Exterior1st    1455 non-null   object
 16  Exterior2nd    1455 non-null   object
 17  MasVnrType     1455 non-null   object
 18  Foundation     1455 non-null   ob

In [87]:
# 명목형 변수 원핫인코딩 진행
train_df = pd.get_dummies(train_df, drop_first=True)
train_df.head()

Unnamed: 0,Id,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Fireplaces,FireplaceQu,GarageYrBlt,GarageCars,GarageArea,GarageQual,GarageCond,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice,MSSubClass_30,MSSubClass_40,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,LandContour_Lvl,Utilities_NoSeWa,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LandSlope_Mod,LandSlope_Sev,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,Condition2_Feedr,Condition2_Norm,Condition2_PosA,Condition2_PosN,Condition2_RRAe,Condition2_RRAn,Condition2_RRNn,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,RoofMatl_CompShg,RoofMatl_Membran,RoofMatl_Metal,RoofMatl_Roll,RoofMatl_Tar&Grv,RoofMatl_WdShake,RoofMatl_WdShngl,Exterior1st_AsphShn,Exterior1st_BrkComm,Exterior1st_BrkFace,Exterior1st_CBlock,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_ImStucc,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stone,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior2nd_AsphShn,Exterior2nd_Brk Cmn,Exterior2nd_BrkFace,Exterior2nd_CBlock,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Other,Exterior2nd_Plywood,Exterior2nd_Stone,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,MasVnrType_BrkFace,MasVnrType_Stone,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_Rec,BsmtFinType1_Unf,BsmtFinType2_BLQ,BsmtFinType2_GLQ,BsmtFinType2_LwQ,BsmtFinType2_Rec,BsmtFinType2_Unf,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_Wall,CentralAir_Y,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageFinish_RFn,GarageFinish_Unf,PavedDrive_P,PavedDrive_Y,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,1,65.0,8450.0,7,5,2003,2003,196.0,4,3,4,3,706,0,150,856,5,856,854,0,1710,1,0,2,1,3,1,4,8,0,4,2003.0,2,548,3,3,0,61,0,0,0,0,0,2,2008,208500,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False
1,2,80.0,9600.0,6,8,1976,1976,0.0,3,3,4,3,978,0,284,1262,5,1262,0,0,1262,0,1,2,0,3,1,3,6,1,3,1976.0,2,460,3,3,298,0,0,0,0,0,0,5,2007,181500,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False
2,3,68.0,11250.0,7,5,2001,2002,162.0,4,3,4,3,486,0,434,920,5,920,866,0,1786,1,0,2,1,3,1,4,6,1,3,2001.0,2,608,3,3,0,42,0,0,0,0,0,9,2008,223500,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False
3,4,60.0,9550.0,7,5,1915,1970,0.0,3,3,3,4,216,0,540,756,4,961,756,0,1717,1,0,1,0,3,1,4,7,1,4,1998.0,3,642,3,3,0,35,272,0,0,0,0,2,2006,140000,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,True,False,True,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False
4,5,84.0,14260.0,8,5,2000,2000,350.0,4,3,4,3,655,0,490,1145,5,1145,1053,0,2198,1,0,2,1,4,1,4,9,1,3,2000.0,3,836,3,3,192,84,0,0,0,0,0,12,2008,250000,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False


In [91]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1455 entries, 0 to 1459
Columns: 227 entries, Id to SaleCondition_Partial
dtypes: bool(181), float64(4), int64(42)
memory usage: 791.4 KB


In [101]:
# Yeo-Johnson 변환 전 수치형 피쳐의 정규성 확인
from scipy.stats import shapiro

# 수치형 변수 갱신(라벨 인코딩한 피쳐 포함)
numerical_cols = train_df.select_dtypes(include=['int64', 'float64']).columns

not_rejected_cols = []

for col in numerical_cols:
    stat, p = shapiro(train_df[numerical_cols])
    alpha = 0.05
    print(col , 'shapiro test result:')
    if p > alpha:
        print('Fail to reject H0')
        not_rejected_cols.append(col)
    else:
        print('reject H0') # 귀무가설 기각 - 정규성을 따르지 않음
    print('------------------')
    
print("정규성을 따르는 칼럼:", not_rejected_cols)

Id shapiro test result:
reject H0
------------------
LotFrontage shapiro test result:
reject H0
------------------
LotArea shapiro test result:
reject H0
------------------
OverallQual shapiro test result:
reject H0
------------------
OverallCond shapiro test result:
reject H0
------------------
YearBuilt shapiro test result:
reject H0
------------------
YearRemodAdd shapiro test result:
reject H0
------------------
MasVnrArea shapiro test result:
reject H0
------------------
ExterQual shapiro test result:
reject H0
------------------
ExterCond shapiro test result:
reject H0
------------------
BsmtQual shapiro test result:
reject H0
------------------
BsmtCond shapiro test result:
reject H0
------------------
BsmtFinSF1 shapiro test result:
reject H0
------------------
BsmtFinSF2 shapiro test result:
reject H0
------------------
BsmtUnfSF shapiro test result:
reject H0
------------------
TotalBsmtSF shapiro test result:
reject H0
------------------
HeatingQC shapiro test result:
reject

  res = hypotest_fun_out(*samples, **kwds)


수치형 피쳐 중 정규성을 따르는 피쳐는 없으므로 모든 수치형 피쳐에 대해 Yeo-Johnson 변환을 적용

In [103]:
# Yeo-Johnson 변환으로 수치형 피쳐 스케일링(표준화 및 정규화)
from sklearn.preprocessing import power_transform

train_df[numerical_cols]=power_transform(train_df[numerical_cols], standardize=True)

train_df.head()

  x = um.multiply(x, x, out=x)


Unnamed: 0,Id,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Fireplaces,FireplaceQu,GarageYrBlt,GarageCars,GarageArea,GarageQual,GarageCond,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice,MSSubClass_30,MSSubClass_40,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,LandContour_Lvl,Utilities_NoSeWa,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LandSlope_Mod,LandSlope_Sev,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,Condition2_Feedr,Condition2_Norm,Condition2_PosA,Condition2_PosN,Condition2_RRAe,Condition2_RRAn,Condition2_RRNn,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,RoofMatl_CompShg,RoofMatl_Membran,RoofMatl_Metal,RoofMatl_Roll,RoofMatl_Tar&Grv,RoofMatl_WdShake,RoofMatl_WdShngl,Exterior1st_AsphShn,Exterior1st_BrkComm,Exterior1st_BrkFace,Exterior1st_CBlock,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_ImStucc,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stone,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior2nd_AsphShn,Exterior2nd_Brk Cmn,Exterior2nd_BrkFace,Exterior2nd_CBlock,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Other,Exterior2nd_Plywood,Exterior2nd_Stone,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,MasVnrType_BrkFace,MasVnrType_Stone,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_Rec,BsmtFinType1_Unf,BsmtFinType2_BLQ,BsmtFinType2_GLQ,BsmtFinType2_LwQ,BsmtFinType2_Rec,BsmtFinType2_Unf,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_Wall,CentralAir_Y,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageFinish_RFn,GarageFinish_Unf,PavedDrive_P,PavedDrive_Y,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,-2.158602,-0.188358,-0.187376,0.672471,-0.476076,1.150705,0.907574,1.218824,1.109464,-0.194812,0.687954,-0.047002,0.802374,-0.357641,-0.888273,-0.400805,0.930703,-0.797452,1.173129,-0.134887,0.535149,1.178944,-0.241209,0.804453,1.284347,0.174182,-0.172982,0.778969,0.951344,-1.024886,0.432357,1.116847,0.277096,0.386197,0.071807,0.070238,-0.949614,0.842402,-0.408411,-0.129505,-0.294333,-0.064349,-0.188646,-1.696652,0.139416,0.579983,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False
1,-2.147155,0.556509,0.082782,-0.040779,2.010117,0.006958,-0.621583,-0.827538,-0.65448,-0.194812,0.687954,-0.047002,1.015855,-0.357641,-0.435267,0.517554,0.930703,0.436048,-0.869272,-0.134887,-0.383646,-0.837895,4.145781,0.804453,-0.773547,0.174182,-0.172982,-0.747771,-0.230933,0.792547,-1.395311,-0.278279,0.277096,-0.002228,0.071807,0.070238,1.21537,-1.070925,-0.408411,-0.129505,-0.294333,-0.064349,-0.188646,-0.445187,-0.613562,0.231388,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False
2,-2.136777,-0.035561,0.43261,0.672471,-0.476076,1.054325,0.835133,1.180719,1.109464,-0.194812,0.687954,-0.047002,0.576547,-0.357641,-0.049056,-0.24964,0.930703,-0.565757,1.175764,-0.134887,0.667576,1.178944,-0.241209,0.804453,1.284347,0.174182,-0.172982,0.778969,-0.230933,0.792547,-1.395311,0.994513,0.277096,0.644221,0.071807,0.070238,-0.949614,0.669211,-0.408411,-0.129505,-0.294333,-0.064349,-0.188646,0.986517,0.139416,0.753809,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False
3,-2.127127,-0.447832,0.071532,0.672471,-0.476076,-1.627818,-0.863268,-0.827538,-0.65448,-0.194812,-0.817283,3.567251,0.147925,-0.357641,0.182331,-0.642977,-0.328798,-0.426224,1.149909,-0.134887,0.54758,1.178944,-0.241209,-1.02351,-0.773547,0.174182,-0.172982,0.778969,0.394346,0.792547,0.432357,0.81728,1.763191,0.788281,0.071807,0.070238,-0.949614,0.585369,2.451794,-0.129505,-0.294333,-0.064349,-0.188646,-1.696652,-1.36793,-0.426926,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,True,False,True,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False
4,-2.118021,0.747777,0.985997,1.361051,-0.476076,1.006892,0.694478,1.325141,1.109464,-0.194812,0.687954,-0.047002,0.755467,-0.357641,0.076492,0.261927,0.930703,0.130075,1.212194,-0.134887,1.302861,1.178944,-0.241209,0.804453,1.284347,1.386026,-0.172982,0.778969,1.453974,0.792547,-1.395311,0.934611,1.763191,1.585161,0.071807,0.070238,1.081581,0.992294,-0.408411,-0.129505,-0.294333,-0.064349,-0.188646,1.960678,0.139416,1.03305,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False


In [105]:
train_df.describe()

Unnamed: 0,Id,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Fireplaces,FireplaceQu,GarageYrBlt,GarageCars,GarageArea,GarageQual,GarageCond,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
count,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0,1455.0
mean,-2.344059e-16,-5.4938870000000005e-17,1.013317e-16,-7.325183e-16,-1.75316e-15,1.110986e-16,-3.540505e-16,-1.025526e-16,7.305344e-15,5.945607e-15,7.813528000000001e-17,2.002217e-16,-1.52608e-16,-1.587123e-17,1.208655e-16,5.274132e-16,-1.623749e-16,1.082906e-15,2.0754680000000003e-17,3.4184190000000005e-17,-5.567139e-16,6.104319000000001e-17,1.062152e-16,-1.709209e-17,-6.592665e-17,-9.766910000000001e-17,2.585484e-15,1.032851e-15,-4.468362e-16,-4.6392820000000004e-17,-5.860146e-17,3.05216e-18,-2.1975550000000002e-17,2.14872e-16,-6.922298e-16,1.960707e-15,-3.9067640000000005e-17,5.615974000000001e-17,-1.709209e-17,2.1975550000000002e-17,-1.709209e-17,7.325183e-18,-1.892339e-17,-2.930073e-16,8.9e-05,-5.524409e-15
std,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344,1.000344
min,-2.158602,-2.789207,-3.201546,-4.24262,-6.125109,-2.273141,-1.48715,-0.8275383,-4.086289,-9.495216,-2.712485,-6.608245,-1.350639,-0.3576407,-2.063627,-3.131904,-2.100252,-3.931963,-0.8692719,-0.1348871,-4.274103,-0.8378948,-0.2412091,-3.074094,-0.7735473,-3.635939,-20.01825,-2.718817,-4.049491,-1.024886,-2.435069,-2.250638,-2.151497,-2.537014,-6.543212,-6.185503,-0.9496145,-1.070925,-0.408411,-0.1295048,-0.2943328,-0.06434895,-0.1886457,-2.181034,-1.36793,-4.080256
25%,-0.7879233,-0.4478317,-0.4211146,-0.7833332,-0.4760758,-0.7098303,-0.973619,-0.8275383,-0.6544803,-0.1948115,-0.8172829,-0.04700219,-1.350639,-0.3576407,-0.6215198,-0.5476075,-1.214138,-0.7011587,-0.8692719,-0.1348871,-0.7203273,-0.8378948,-0.2412091,-1.02351,-0.7735473,-1.059673,-0.1729824,-0.7477711,-0.9446681,-1.024886,-0.4814771,-0.8181892,-1.04541,-0.6082683,0.07180706,0.07023751,-0.9496145,-1.070925,-0.408411,-0.1295048,-0.2943328,-0.06434895,-0.1886457,-0.4451874,-0.613562,-0.6181993
50%,0.1001929,0.06768133,0.05208206,-0.04077851,-0.4760758,-0.1010213,0.3044493,-0.8275383,-0.6544803,-0.1948115,0.6879536,-0.04700219,0.4394037,-0.3576407,0.05245337,-0.08735977,0.9307026,-0.03719001,-0.8692719,-0.1348871,0.05159024,-0.8378948,-0.2412091,0.8044529,-0.7735473,0.1741817,-0.1729824,-0.7477711,-0.2309331,0.7925466,0.4323573,-0.1698554,0.2770956,0.08274381,0.07180706,0.07023751,-0.9496145,0.4322396,-0.408411,-0.1295048,-0.2943328,-0.06434895,-0.1886457,-0.06851241,0.139416,-0.0418604
75%,0.8621346,0.5082447,0.4823494,0.6724711,0.4410559,1.030609,0.9814606,1.183223,1.109464,-0.1948115,0.6879536,-0.04700219,0.8032669,-0.3576407,0.6770584,0.5821016,0.9307026,0.7287904,1.142646,-0.1348871,0.6478893,1.178944,-0.2412091,0.8044529,1.284347,0.1741817,-0.1729824,0.7789691,0.3943461,0.7925466,0.4323573,0.9945133,0.2770956,0.5072386,0.07180706,0.07023751,1.04007,0.8931627,-0.408411,-0.1295048,-0.2943328,-0.06434895,-0.1886457,0.6454491,0.890924,0.639278
max,1.555158,4.834042,4.864027,2.677305,2.696282,1.50447,1.456756,1.54499,2.150175,4.415027,1.941146,3.567251,2.48056,2.797267,2.535183,8.687042,0.9307026,4.364175,1.309225,7.413605,4.25463,1.865844,4.145781,2.501362,1.614241,6.087534,5.313877,2.025032,3.427494,2.175042,4.035255,1.572797,3.384134,3.796773,10.47536,13.51154,1.519106,1.888376,2.45244,7.721723,3.397538,15.54027,5.300943,1.960678,1.641124,3.265572


In [107]:
# 전처리 결과 csv로 저장
train_df.to_csv('preprocessing_DataFrame_fin.csv', index=False)