## Cross Validationを取り入れる

In [2]:
import xgboost as xgb
from xgboost import XGBRegressor
# 事前準備処理
# x_train. y_train, x_eval, y_evalを作成する
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import time

## 　ハイパーパラメタ
params = {
    'n_estimators':700,
    'max_depth':6,
    'min_child_weight':9,
    'gamma':0,
    'subsample':1.0,
    'colsample_bytree':0.6,
    'learning_rate':0.1
}

def mean_absolute_percentage_error( y_train: np.array, y_pred: np.array):
    diff = 0
    n = len(y_train)
    for i in range(n):
        diff += abs(y_train[i]-y_pred[i])/y_train[i]
    score = 100*diff / n
    
    return score

train_x = pd.read_csv("data/processed_train_goto_x_v11.csv")
train_y = pd.read_csv("data/processed_train_goto_y_v11.csv")



In [9]:
error = []
models = []
for i in range(20):
    s = np.random.randint(2143486417,high=None)
    X_train, X_eval, Y_train, Y_eval = train_test_split( train_x, train_y, train_size=0.8, test_size=0.2, random_state = s)
    
    x_train = X_train.drop(['id','pj_no'],axis=1)
    y_train = Y_train.drop(['id'],axis=1)
    x_eval = X_eval.drop(['id','pj_no'],axis=1)

    model = XGBRegressor(**params, seed=19711022, n_jobs=-1)
    model.fit(x_train, y_train )
    pred = model.predict(x_eval)
    
    e = mean_absolute_percentage_error(Y_eval['keiyaku_pr'].values, pred)
    
    d = (s, e)
    print(d)
    error.append(d)
    models.append(model)

(1332828703, 7.8379210715338585)
(1454746898, 8.4867126590780888)
(377794708, 7.5356499399057357)
(366749575, 8.3039104291201742)
(18169468, 8.0925289301890526)
(2076689137, 8.4200145919153151)
(1054352731, 8.5484627665613395)
(809219189, 8.4967029694349581)
(433468903, 7.782745530754541)
(1165507357, 7.7012528782913527)
(338066410, 7.935656178425881)
(1839154262, 7.8670657751646376)
(1938919049, 8.8372092777622537)
(1142919811, 7.4537588103230465)
(506513843, 8.2040426078493009)
(276199332, 8.6986267106055077)
(1501742848, 8.8935179936857267)
(1158979114, 8.5717552137171182)
(1618916078, 7.7488989461460234)
(560138133, 8.854754522179034)


In [12]:
test_x = pd.read_csv("data/processed_test_goto_x_v11.csv")
x_test = test_x.drop(['id','pj_no'],axis=1)

model = models[0]
ans = model.predict(x_test)

submit = pd.DataFrame(test_x[['id']])
submit['keiyaku_pr']=pd.Series(ans).astype(np.int64)

In [13]:
submit.head()

Unnamed: 0,id,keiyaku_pr
0,test_0000,20697090
1,test_0001,28098872
2,test_0002,26138188
3,test_0003,28254930
4,test_0004,26468168


In [14]:
submit.to_csv('data/submit_v11.tsv',sep='\t',header=None, index=False)

# V9での実施事項
- 土地売りと建て売りとで分けて訓練、予測を行う

#### ⇒効果ないことが判明した
#### それよりも、効果のないカラムを削除した方が良いのかもしれない

# V8での提出結果
MAPE ... 10.46

### 気づき事項
- 路線ごとにerrorが異なるのではないか？
- 上記の結果では、路線ごとにモデルを作るべきなのかもしれない
- 訓練データでのMAPEが3.56に対して、土地売りだと4.87。土地売りは別モデルとして学習すべき？
- 異常値は除去すべきかもしれない。

## 7/20 実施事項
- V9データと、XGBoostの固定のハイパーパラメタを使い、XGBoostに与える乱数を変更して複数のモデルを作ることで、精度が向上するかを試す

In [2]:
import xgboost as xgb
from xgboost import XGBRegressor
# 事前準備処理
# x_train. y_train, x_eval, y_evalを作成する
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import time


def mean_absolute_percentage_error( y_train: np.array, y_pred: np.array):
    diff = 0
    n = len(y_train)
    for i in range(n):
        diff += abs(y_train[i]-y_pred[i])/y_train[i]
    score = 100*diff / n
    
    return score

def learn( train_x, train_y, params, s ):
    model = XGBRegressor(**params, seed=s, n_jobs=-1)
    model.fit(train_x, train_y)
    
    return model

train_x = pd.read_csv("data/processed_train_goto_x_v11.csv")
train_y = pd.read_csv("data/processed_train_goto_y_v11.csv")
X_train, X_eval, Y_train, Y_eval = train_test_split( train_x, train_y, train_size=0.8, random_state = 19711022)

## 　ハイパーパラメタ
params = {
    'n_estimators':700,
    'max_depth':6,
    'min_child_weight':9,
    'gamma':0,
    'subsample':1.0,
    'colsample_bytree':0.6,
    'learning_rate':0.1
}

x_train = X_train.drop(['id','pj_no'],axis=1)
y_train = Y_train.drop(['id'],axis=1)
x_eval = X_eval.drop(['id','pj_no'],axis=1)





In [3]:
import time
models= []
preds = []

for i in range(30):
    print('iter : ', i,' starting...', end=' ')
    start = time.perf_counter()
    model = learn(x_train, y_train, params, np.random.randint(2143486417,high=None))
    pred = model.predict(x_eval)
    models.append(model)
    preds.append(pred)
    end = time.perf_counter()
    print('finished ', 'elapsed time : ', end-start)
    

iter :  0  starting... finished  elapsed time :  16.804664932999998
iter :  1  starting... finished  elapsed time :  16.15021919200001
iter :  2  starting... finished  elapsed time :  15.564320976000005
iter :  3  starting... finished  elapsed time :  15.769527966999988
iter :  4  starting... finished  elapsed time :  15.51662281099999
iter :  5  starting... finished  elapsed time :  16.111430721999966
iter :  6  starting... finished  elapsed time :  15.502486129999966
iter :  7  starting... finished  elapsed time :  15.959529698999972
iter :  8  starting... finished  elapsed time :  15.556111141999963
iter :  9  starting... finished  elapsed time :  15.618003938000015
iter :  10  starting... finished  elapsed time :  15.64753819599997
iter :  11  starting... finished  elapsed time :  16.059642924000002
iter :  12  starting... finished  elapsed time :  16.594651253999984
iter :  13  starting... finished  elapsed time :  16.98959251399998
iter :  14  starting... finished  elapsed time :

In [4]:
df = pd.DataFrame(preds).T

In [5]:
df['mean']=df.apply( lambda x: int(x.mean()),axis=1)

In [6]:
Y_eval_pred = pd.concat([Y_eval.reset_index(), df], axis=1)
Y_eval_pred['mean_error']=abs(Y_eval_pred['keiyaku_pr']-Y_eval_pred['mean'])/Y_eval_pred['keiyaku_pr']*100
Y_eval_pred['mean_error'].mean()

8.491300804703242

In [38]:
X_eval.head(2)

Unnamed: 0,id,pj_no,tc_mseki,tt_mseki,fukuin,road_st,garage,chiseki_js_hb,chiseki_kb_hb,yoseki1,...,行き止まり,行き止まり途中,街道沿い,裏道,角地,計画道路,踏切付近,genba_su,min_tc_mseki_goto,max_tc_mseki_goto
1255,train_1255,521,99.56,100.44,4.0,1,1,199.06,199.11,200,...,0,0,0,0,0,0,0,2.0,0,0
2319,train_2319,959,150.54,101.85,4.0,1,2,309.61,309.0,80,...,0,0,0,0,0,0,0,2.0,0,0


In [37]:
eval_out = pd.DataFrame(Y_eval_pred[['id','keiyaku_pr','mean','mean_error']])

In [40]:
eval_out = pd.merge(eval_out, X_eval,on='id',how='left')

In [42]:
eval_out.to_csv("data/submit_v11_error.csv")

#### これで次の提出データを作る

In [280]:
test_x = pd.read_csv("data/processed_test_goto_x_v11.csv")
x_test = test_x.drop(['id','pj_no'],axis=1)

In [281]:
anss = []

for i in range(30):
    #print('iter : ', i,' starting...', end=' ')
    start = time.perf_counter()
    model = models[i]
    ans = model.predict(x_test)
    anss.append(ans)
    end = time.perf_counter()
    #print('finished ', 'elapsed time : ', end-start)

In [282]:
df = pd.DataFrame(anss).T

In [283]:
df['mean']=df.apply( lambda x: int(x.mean()),axis=1)

In [284]:
submit = pd.DataFrame(test_x[['id']])
submit['keiyaku_pr']=df['mean']
submit.to_csv('data/submit_v11.tsv',sep='\t',header=None, index=False)

### X_evalで再現テストする

In [271]:
x_test = X_eval.drop(['id','pj_no'],axis=1)

In [272]:
anss = []

for i in range(30):
    #print('iter : ', i,' starting...', end=' ')
    start = time.perf_counter()
    model = models[i]
    ans = model.predict(x_test)
    anss.append(ans)
    end = time.perf_counter()
    #print('finished ', 'elapsed time : ', end-start)

In [273]:
df = pd.DataFrame(anss).T
df['mean']=df.apply( lambda x: int(x.mean()),axis=1)

In [274]:
Y_eval_pred2=pd.concat([Y_eval.reset_index(), df], axis=1)

In [275]:
Y_eval_pred2['error']=abs(Y_eval_pred2['keiyaku_pr']-Y_eval_pred2['mean'])/Y_eval_pred2['keiyaku_pr']*100

In [276]:
Y_eval_pred2['error'].mean()

8.42161858338207

### X_trainで再現テスト

In [277]:
x_test = X_train.drop(['id','pj_no'],axis=1)

In [278]:
anss = []

for i in range(30):
    #print('iter : ', i,' starting...', end=' ')
    start = time.perf_counter()
    model = models[i]
    ans = model.predict(x_test)
    anss.append(ans)
    end = time.perf_counter()
    #print('finished ', 'elapsed time : ', end-start)

In [279]:
df = pd.DataFrame(anss).T
df['mean']=df.apply( lambda x: int(x.mean()),axis=1)
Y_eval_pred2=pd.concat([Y_train.reset_index(), df], axis=1)
Y_eval_pred2['error']=abs(Y_eval_pred2['keiyaku_pr']-Y_eval_pred2['mean'])/Y_eval_pred2['keiyaku_pr']*100
Y_eval_pred2['error'].mean()

2.088302589736845

## 訓練データ全体で予測し、訓練データと結合

In [7]:
test = train_x.drop(['id','pj_no'],axis=1)
anss = []
for i in range(30):
    #print('iter : ', i,' starting...', end=' ')
    start = time.perf_counter()
    model = models[i]
    ans = model.predict(test)
    anss.append(ans)
    end = time.perf_counter()
    #print('finished ', 'elapsed time : ', end-start)
df = pd.DataFrame(anss).T
df['mean']=df.apply( lambda x: int(x.mean()),axis=1)
df2=pd.concat([train_y.reset_index(), df], axis=1)
df2['error']=abs(df2['keiyaku_pr']-df2['mean'])/df2['keiyaku_pr']*100
df2['error'].mean()

3.354674558539814

In [10]:
df3=pd.concat([train_y.reset_index(), df2['mean'],df2['error']], axis=1)
genba = pd.read_csv("data/train_genba.tsv", sep='\t')
goto = pd.read_csv("data/train_goto.tsv", sep='\t')
df4 = pd.merge(df3, goto, on='id', how='left')
df5 = pd.merge(df4, genba, on='pj_no', how='left')

In [16]:
df5.to_csv("data/submit_v11_error.csv")

In [17]:
import pandas_profiling as pdp
pdp.ProfileReport(df5)

0,1
Number of variables,162
Number of observations,6461
Total Missing (%),40.3%
Total size in memory,8.0 MiB
Average record size in memory,1.3 KiB

0,1
Numeric,48
Categorical,102
Boolean,1
Date,0
Text (Unique),1
Rejected,10
Unsupported,0

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
徒歩,5002
バス,1391
車,68

Value,Count,Frequency (%),Unnamed: 3
徒歩,5002,77.4%,
バス,1391,21.5%,
車,68,1.1%,

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),90.9%
Missing (n),5875

0,1
徒歩,366
バス,191
車,29
(Missing),5875

Value,Count,Frequency (%),Unnamed: 3
徒歩,366,5.7%,
バス,191,3.0%,
車,29,0.4%,
(Missing),5875,90.9%,

0,1
Distinct count,96
Unique (%),1.5%
Missing (%),95.1%
Missing (n),6147

0,1
浦和パークハイツ,23
山崎,20
松伏町役場前,15
Other values (92),256
(Missing),6147

Value,Count,Frequency (%),Unnamed: 3
浦和パークハイツ,23,0.4%,
山崎,20,0.3%,
松伏町役場前,15,0.2%,
原山3丁目,13,0.2%,
ぶしニュータウン南,13,0.2%,
西新宿,12,0.2%,
東明寺橋,10,0.2%,
鴨田,10,0.2%,
村国入口,7,0.1%,
笹目,7,0.1%,

0,1
Distinct count,36
Unique (%),0.6%
Missing (%),98.6%
Missing (n),6371

0,1
レイクタウン北,7
大門小学校入口,7
美笹支所入口,6
Other values (32),70
(Missing),6371

Value,Count,Frequency (%),Unnamed: 3
レイクタウン北,7,0.1%,
大門小学校入口,7,0.1%,
美笹支所入口,6,0.1%,
宿上,6,0.1%,
下片山,6,0.1%,
水天宮,5,0.1%,
柳島,5,0.1%,
南小学校入口,5,0.1%,
手代中央公園入口,4,0.1%,
弥平町,4,0.1%,

0,1
Distinct count,6
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
防火指定無,3631
22条区域,2332
準防火地域,463
Other values (3),35

Value,Count,Frequency (%),Unnamed: 3
防火指定無,3631,56.2%,
22条区域,2332,36.1%,
準防火地域,463,7.2%,
その他,14,0.2%,
防火地域,12,0.2%,
無指定,9,0.1%,

0,1
Distinct count,126
Unique (%),2.0%
Missing (%),54.2%
Missing (n),3505
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,29.765
Minimum,0
Maximum,665
Zeros (%),21.0%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,10
Q3,45
95-th percentile,102
Maximum,665
Range,665
Interquartile range,45

0,1
Standard deviation,62.591
Coef of variation,2.1029
Kurtosis,69.358
Mean,29.765
MAD,32.528
Skewness,7.2725
Sum,87984
Variance,3917.7
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,1354,21.0%,
20.0,122,1.9%,
35.0,87,1.3%,
25.0,66,1.0%,
10.0,49,0.8%,
40.0,43,0.7%,
15.0,42,0.7%,
50.0,42,0.7%,
30.0,39,0.6%,
60.0,32,0.5%,

Value,Count,Frequency (%),Unnamed: 3
0.0,1354,21.0%,
1.0,10,0.2%,
2.0,16,0.2%,
3.0,19,0.3%,
4.0,17,0.3%,

Value,Count,Frequency (%),Unnamed: 3
170.0,1,0.0%,
178.0,1,0.0%,
184.0,1,0.0%,
200.0,1,0.0%,
665.0,20,0.3%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（不要）,4856
（要）,1605

Value,Count,Frequency (%),Unnamed: 3
（不要）,4856,75.2%,
（要）,1605,24.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（無）,5616
（有）,845

Value,Count,Frequency (%),Unnamed: 3
（無）,5616,86.9%,
（有）,845,13.1%,

0,1
Distinct count,2617
Unique (%),40.5%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,736.29
Minimum,39.16
Maximum,5343.7
Zeros (%),0.0%

0,1
Minimum,39.16
5-th percentile,101.24
Q1,216.51
Median,441.79
Q3,919.52
95-th percentile,2447.3
Maximum,5343.7
Range,5304.6
Interquartile range,703.01

0,1
Standard deviation,807.76
Coef of variation,1.0971
Kurtosis,6.8094
Mean,736.29
MAD,565.44
Skewness,2.3466
Sum,4757200
Variance,652480
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
4017.4,31,0.5%,
5343.73,28,0.4%,
3732.69,24,0.4%,
3098.89,22,0.3%,
2780.39,22,0.3%,
4131.27,20,0.3%,
2964.51,20,0.3%,
2346.35,20,0.3%,
3122.66,20,0.3%,
2703.24,19,0.3%,

Value,Count,Frequency (%),Unnamed: 3
39.16,1,0.0%,
52.28,1,0.0%,
54.4,1,0.0%,
58.49,1,0.0%,
58.61,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
3317.91,9,0.1%,
3732.69,24,0.4%,
4017.4,31,0.5%,
4131.27,20,0.3%,
5343.73,28,0.4%,

0,1
Correlation,0.96218

0,1
Distinct count,49
Unique (%),0.8%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,15.258
Minimum,1
Maximum,90
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,5
Q1,10
Median,14
Q3,20
95-th percentile,28
Maximum,90
Range,89
Interquartile range,10

0,1
Standard deviation,7.1299
Coef of variation,0.46729
Kurtosis,3.2443
Mean,15.258
MAD,5.5741
Skewness,1.0302
Sum,98582
Variance,50.836
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
10,577,8.9%,
15,558,8.6%,
13,411,6.4%,
18,337,5.2%,
20,323,5.0%,
12,323,5.0%,
14,323,5.0%,
9,303,4.7%,
8,297,4.6%,
11,293,4.5%,

Value,Count,Frequency (%),Unnamed: 3
1,7,0.1%,
2,29,0.4%,
3,65,1.0%,
4,60,0.9%,
5,200,3.1%,

Value,Count,Frequency (%),Unnamed: 3
51,7,0.1%,
53,1,0.0%,
56,1,0.0%,
58,1,0.0%,
90,1,0.0%,

0,1
Distinct count,26
Unique (%),0.4%
Missing (%),97.0%
Missing (n),6269
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,12.698
Minimum,0
Maximum,34
Zeros (%),0.0%

0,1
Minimum,0.0
5-th percentile,3.0
Q1,5.0
Median,13.5
Q3,17.0
95-th percentile,25.0
Maximum,34.0
Range,34.0
Interquartile range,12.0

0,1
Standard deviation,6.6326
Coef of variation,0.52234
Kurtosis,-0.38399
Mean,12.698
MAD,5.4387
Skewness,0.37168
Sum,2438
Variance,43.992
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
5.0,32,0.5%,
14.0,24,0.4%,
9.0,13,0.2%,
20.0,12,0.2%,
15.0,12,0.2%,
16.0,10,0.2%,
3.0,10,0.2%,
25.0,9,0.1%,
11.0,9,0.1%,
18.0,8,0.1%,

Value,Count,Frequency (%),Unnamed: 3
0.0,1,0.0%,
3.0,10,0.2%,
4.0,6,0.1%,
5.0,32,0.5%,
7.0,2,0.0%,

Value,Count,Frequency (%),Unnamed: 3
24.0,2,0.0%,
25.0,9,0.1%,
26.0,3,0.0%,
27.0,2,0.0%,
34.0,1,0.0%,

0,1
Distinct count,204
Unique (%),3.2%
Missing (%),0.0%
Missing (n),0

0,1
八潮,203
上尾,157
戸塚安行,119
Other values (201),5982

Value,Count,Frequency (%),Unnamed: 3
八潮,203,3.1%,
上尾,157,2.4%,
戸塚安行,119,1.8%,
新河岸,118,1.8%,
桶川,116,1.8%,
蓮田,108,1.7%,
東川口,105,1.6%,
大宮,103,1.6%,
北浦和,102,1.6%,
三郷中央,101,1.6%,

0,1
Distinct count,57
Unique (%),0.9%
Missing (%),97.0%
Missing (n),6269

0,1
草加,15
東川口,13
浦和,12
Other values (53),152
(Missing),6269

Value,Count,Frequency (%),Unnamed: 3
草加,15,0.2%,
東川口,13,0.2%,
浦和,12,0.2%,
せんげん台,10,0.2%,
蓮田,10,0.2%,
朝霞台,7,0.1%,
越谷レイクタウン,7,0.1%,
南浦和,7,0.1%,
武蔵浦和,6,0.1%,
浦和美園,5,0.1%,

0,1
Distinct count,6448
Unique (%),99.8%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3.3547
Minimum,0.00037453
Maximum,903.32
Zeros (%),0.0%

0,1
Minimum,0.00037453
5-th percentile,0.13018
Q1,0.72533
Median,1.7625
Q3,3.7689
95-th percentile,10.862
Maximum,903.32
Range,903.32
Interquartile range,3.0436

0,1
Standard deviation,13.34
Coef of variation,3.9766
Kurtosis,3334.6
Mean,3.3547
MAD,2.9674
Skewness,52.556
Sum,21675
Variance,177.96
Memory size,421.0 KiB

Value,Count,Frequency (%),Unnamed: 3
9.796152941176471,3,0.0%,
0.6416,2,0.0%,
0.33416883116883117,2,0.0%,
0.4047442550037065,2,0.0%,
0.8413036463375283,2,0.0%,
4.892508771929824,2,0.0%,
4.004880829015544,2,0.0%,
0.6512222222222223,2,0.0%,
0.07064166666666667,2,0.0%,
1.1303093525179857,2,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0.0003745318352059,1,0.0%,
0.0013569321533923,1,0.0%,
0.0016655172413793,1,0.0%,
0.0017771883289124,1,0.0%,
0.0018254545454545,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
74.8471081081081,1,0.0%,
100.93980555555557,1,0.0%,
269.8712333333333,1,0.0%,
384.0392,1,0.0%,
903.3216785714286,1,0.0%,

0,1
Distinct count,14
Unique (%),0.2%
Missing (%),5.6%
Missing (n),359
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.17704
Minimum,0
Maximum,150
Zeros (%),93.8%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,0
Maximum,150
Range,150
Interquartile range,0

0,1
Standard deviation,3.0666
Coef of variation,17.321
Kurtosis,1175.8
Mean,0.17704
MAD,0.35182
Skewness,29.623
Sum,1080.3
Variance,9.4039
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,6063,93.8%,
29.0,16,0.2%,
7.0,4,0.1%,
5.0,4,0.1%,
50.0,3,0.0%,
30.0,3,0.0%,
18.72,2,0.0%,
12.0,2,0.0%,
2.9,1,0.0%,
100.0,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0.0,6063,93.8%,
2.9,1,0.0%,
4.0,1,0.0%,
5.0,4,0.1%,
7.0,4,0.1%,

Value,Count,Frequency (%),Unnamed: 3
29.0,16,0.2%,
30.0,3,0.0%,
50.0,3,0.0%,
100.0,1,0.0%,
150.0,1,0.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（不要）,6418
（要）,40
（無）,3

Value,Count,Frequency (%),Unnamed: 3
（不要）,6418,99.3%,
（要）,40,0.6%,
（無）,3,0.0%,

0,1
Distinct count,38
Unique (%),0.6%
Missing (%),5.1%
Missing (n),330
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.7429
Minimum,0
Maximum,290
Zeros (%),92.3%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,0
Maximum,290
Range,290
Interquartile range,0

0,1
Standard deviation,17.212
Coef of variation,9.8755
Kurtosis,186.92
Mean,1.7429
MAD,3.3905
Skewness,12.996
Sum,10686
Variance,296.26
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,5963,92.3%,
10.0,24,0.4%,
143.0,17,0.3%,
9.85,16,0.2%,
290.0,12,0.2%,
7.0,7,0.1%,
100.0,7,0.1%,
3.0,5,0.1%,
170.0,5,0.1%,
20.0,5,0.1%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5963,92.3%,
1.0,1,0.0%,
3.0,5,0.1%,
3.3,2,0.0%,
3.5,2,0.0%,

Value,Count,Frequency (%),Unnamed: 3
102.0,1,0.0%,
143.0,17,0.3%,
170.0,5,0.1%,
210.0,4,0.1%,
290.0,12,0.2%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（不要）,6293
（要）,168

Value,Count,Frequency (%),Unnamed: 3
（不要）,6293,97.4%,
（要）,168,2.6%,

0,1
Distinct count,150
Unique (%),2.3%
Missing (%),1.5%
Missing (n),94
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,5.557
Minimum,0
Maximum,25.1
Zeros (%),0.0%

0,1
Minimum,0.0
5-th percentile,4.0
Q1,4.0
Median,5.0
Q3,6.0
95-th percentile,10.0
Maximum,25.1
Range,25.1
Interquartile range,2.0

0,1
Standard deviation,2.2604
Coef of variation,0.40676
Kurtosis,15.234
Mean,5.557
MAD,1.4386
Skewness,3.2423
Sum,35382
Variance,5.1093
Memory size,421.0 KiB

Value,Count,Frequency (%),Unnamed: 3
4.0,1985,30.7%,
6.0,1834,28.4%,
4.2,391,6.1%,
5.0,368,5.7%,
4.5,247,3.8%,
8.0,135,2.1%,
12.0,78,1.2%,
10.0,58,0.9%,
4.3,57,0.9%,
7.0,56,0.9%,

Value,Count,Frequency (%),Unnamed: 3
0.0,1,0.0%,
1.9,1,0.0%,
2.0,4,0.1%,
2.4,1,0.0%,
2.5,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
21.5,2,0.0%,
22.0,2,0.0%,
23.0,7,0.1%,
25.0,2,0.0%,
25.1,1,0.0%,

0,1
Distinct count,9
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.7864
Minimum,0
Maximum,9
Zeros (%),5.6%

0,1
Minimum,0
5-th percentile,0
Q1,1
Median,2
Q3,2
95-th percentile,3
Maximum,9
Range,9
Interquartile range,1

0,1
Standard deviation,0.74705
Coef of variation,0.41819
Kurtosis,7.321
Mean,1.7864
MAD,0.52547
Skewness,0.45129
Sum,11542
Variance,0.55809
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
2,4204,65.1%,
1,1334,20.6%,
3,487,7.5%,
0,363,5.6%,
4,52,0.8%,
5,9,0.1%,
6,6,0.1%,
8,4,0.1%,
9,2,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0,363,5.6%,
1,1334,20.6%,
2,4204,65.1%,
3,487,7.5%,
4,52,0.8%,

Value,Count,Frequency (%),Unnamed: 3
4,52,0.8%,
5,9,0.1%,
6,6,0.1%,
8,4,0.1%,
9,2,0.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
都市ガス,4585
個別プロパン,1728
集中プロパン,148

Value,Count,Frequency (%),Unnamed: 3
都市ガス,4585,71.0%,
個別プロパン,1728,26.7%,
集中プロパン,148,2.3%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
公共下水,5639
個別浄化槽,822

Value,Count,Frequency (%),Unnamed: 3
公共下水,5639,87.3%,
個別浄化槽,822,12.7%,

0,1
Distinct count,320
Unique (%),5.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1185.9
Minimum,10
Maximum,4800
Zeros (%),0.0%

0,1
Minimum,10
5-th percentile,300
Q1,700
Median,1100
Q3,1590
95-th percentile,2400
Maximum,4800
Range,4790
Interquartile range,890

0,1
Standard deviation,650.2
Coef of variation,0.54828
Kurtosis,1.9559
Mean,1185.9
MAD,506.64
Skewness,1.0308
Sum,7662144
Variance,422770
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
800,334,5.2%,
1200,300,4.6%,
1600,241,3.7%,
1100,223,3.5%,
1000,202,3.1%,
1300,183,2.8%,
1500,160,2.5%,
700,152,2.4%,
1400,135,2.1%,
600,125,1.9%,

Value,Count,Frequency (%),Unnamed: 3
10,1,0.0%,
16,2,0.0%,
19,2,0.0%,
20,3,0.0%,
44,3,0.0%,

Value,Count,Frequency (%),Unnamed: 3
4200,1,0.0%,
4300,1,0.0%,
4560,4,0.1%,
4600,4,0.1%,
4800,2,0.0%,

0,1
Correlation,0.99787

0,1
Distinct count,321
Unique (%),5.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,811.43
Minimum,6
Maximum,8800
Zeros (%),0.0%

0,1
Minimum,6
5-th percentile,200
Q1,480
Median,720
Q3,1080
95-th percentile,1680
Maximum,8800
Range,8794
Interquartile range,600

0,1
Standard deviation,504.48
Coef of variation,0.62171
Kurtosis,48.799
Mean,811.43
MAD,362.79
Skewness,3.7379
Sum,5242659
Variance,254500
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
400,260,4.0%,
800,252,3.9%,
1200,237,3.7%,
600,219,3.4%,
1100,208,3.2%,
1000,197,3.0%,
650,171,2.6%,
700,160,2.5%,
900,152,2.4%,
480,145,2.2%,

Value,Count,Frequency (%),Unnamed: 3
6,1,0.0%,
10,4,0.1%,
11,2,0.0%,
20,9,0.1%,
30,3,0.0%,

Value,Count,Frequency (%),Unnamed: 3
2600,2,0.0%,
3120,2,0.0%,
3200,1,0.0%,
3900,4,0.1%,
8800,5,0.1%,

0,1
Correlation,0.99858

0,1
Distinct count,55
Unique (%),0.9%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,11.811
Minimum,0
Maximum,91
Zeros (%),0.1%

0,1
Minimum,0
5-th percentile,3
Q1,7
Median,10
Q3,15
95-th percentile,27
Maximum,91
Range,91
Interquartile range,8

0,1
Standard deviation,7.581
Coef of variation,0.64185
Kurtosis,6.8608
Mean,11.811
MAD,5.5416
Skewness,1.8019
Sum,76312
Variance,57.471
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
10.0,489,7.6%,
5.0,480,7.4%,
8.0,416,6.4%,
7.0,410,6.3%,
9.0,410,6.3%,
12.0,394,6.1%,
6.0,343,5.3%,
15.0,342,5.3%,
11.0,339,5.2%,
14.0,323,5.0%,

Value,Count,Frequency (%),Unnamed: 3
0.0,7,0.1%,
1.0,115,1.8%,
2.0,147,2.3%,
3.0,235,3.6%,
4.0,247,3.8%,

Value,Count,Frequency (%),Unnamed: 3
57.0,2,0.0%,
59.0,1,0.0%,
63.0,1,0.0%,
72.0,3,0.0%,
91.0,1,0.0%,

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),1.5%
Missing (n),95

0,1
良い,3470
普通,2578
悪い,318
(Missing),95

Value,Count,Frequency (%),Unnamed: 3
良い,3470,53.7%,
普通,2578,39.9%,
悪い,318,4.9%,
(Missing),95,1.5%,

0,1
Distinct count,21
Unique (%),0.3%
Missing (%),72.5%
Missing (n),4681

0,1
景観法,950
農地法,360
国土法,208
Other values (17),262
(Missing),4681

Value,Count,Frequency (%),Unnamed: 3
景観法,950,14.7%,
農地法,360,5.6%,
国土法,208,3.2%,
埋蔵文化財,85,1.3%,
農地法届出要,48,0.7%,
景観地区,38,0.6%,
文化財保護法（埋蔵文化財）,28,0.4%,
河川法,14,0.2%,
航空法,12,0.2%,
文化財保護法,9,0.1%,

0,1
Distinct count,20
Unique (%),0.3%
Missing (%),86.8%
Missing (n),5611

0,1
農地法,617
景観法,111
文化財保護法（埋蔵文化財）,48
Other values (16),74
(Missing),5611

Value,Count,Frequency (%),Unnamed: 3
農地法,617,9.5%,
景観法,111,1.7%,
文化財保護法（埋蔵文化財）,48,0.7%,
航空法,16,0.2%,
農地法届出要,12,0.2%,
東日本震災復興特,9,0.1%,
区画整理法,7,0.1%,
景観地区,6,0.1%,
公拡法,6,0.1%,
埋蔵文化財,5,0.1%,

0,1
Distinct count,8
Unique (%),0.1%
Missing (%),97.1%
Missing (n),6275

0,1
埋蔵文化財,108
文化財保護法（埋蔵文化財）,31
風致地区,28
Other values (4),19
(Missing),6275

Value,Count,Frequency (%),Unnamed: 3
埋蔵文化財,108,1.7%,
文化財保護法（埋蔵文化財）,31,0.5%,
風致地区,28,0.4%,
農地法,8,0.1%,
東日本震災復興特,5,0.1%,
自然公園法,3,0.0%,
景観法,3,0.0%,
(Missing),6275,97.1%,

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),99.4%
Missing (n),6425

0,1
公拡法,28
文化財保護法（埋蔵文化財）,6
風致地区,2
(Missing),6425

Value,Count,Frequency (%),Unnamed: 3
公拡法,28,0.4%,
文化財保護法（埋蔵文化財）,6,0.1%,
風致地区,2,0.0%,
(Missing),6425,99.4%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（不要）,5398
（要）,1063

Value,Count,Frequency (%),Unnamed: 3
（不要）,5398,83.5%,
（要）,1063,16.5%,

0,1
Distinct count,7
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
更地,3739
建物有,1395
造成上り,1071
Other values (4),256

Value,Count,Frequency (%),Unnamed: 3
更地,3739,57.9%,
建物有,1395,21.6%,
造成上り,1071,16.6%,
田・畑,177,2.7%,
その他,49,0.8%,
山林,26,0.4%,
雑木林・竹林,4,0.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),3.9%
Missing (n),249

0,1
Mean,0

0,1
0.0,6212
(Missing),249

Value,Count,Frequency (%),Unnamed: 3
0.0,6212,96.1%,
(Missing),249,3.9%,

0,1
Correlation,0.98845

0,1
Distinct count,9
Unique (%),0.1%
Missing (%),2.9%
Missing (n),187
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.097864
Minimum,0
Maximum,7
Zeros (%),92.3%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,0
Maximum,7
Range,7
Interquartile range,0

0,1
Standard deviation,0.54743
Coef of variation,5.5938
Kurtosis,79.43
Mean,0.097864
MAD,0.186
Skewness,8.1145
Sum,614
Variance,0.29968
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,5962,92.3%,
1.0,171,2.6%,
2.0,77,1.2%,
3.0,27,0.4%,
7.0,12,0.2%,
5.0,10,0.2%,
4.0,8,0.1%,
6.0,7,0.1%,
(Missing),187,2.9%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5962,92.3%,
1.0,171,2.6%,
2.0,77,1.2%,
3.0,27,0.4%,
4.0,8,0.1%,

Value,Count,Frequency (%),Unnamed: 3
3.0,27,0.4%,
4.0,8,0.1%,
5.0,10,0.2%,
6.0,7,0.1%,
7.0,12,0.2%,

First 3 values
train_2669
train_3759
train_4703

Last 3 values
train_0331
train_3761
train_5979

Value,Count,Frequency (%),Unnamed: 3
train_0000,1,0.0%,
train_0001,1,0.0%,
train_0002,1,0.0%,
train_0003,1,0.0%,
train_0004,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
train_6456,1,0.0%,
train_6457,1,0.0%,
train_6458,1,0.0%,
train_6459,1,0.0%,
train_6460,1,0.0%,

0,1
Distinct count,6461
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3230
Minimum,0
Maximum,6460
Zeros (%),0.0%

0,1
Minimum,0
5-th percentile,323
Q1,1615
Median,3230
Q3,4845
95-th percentile,6137
Maximum,6460
Range,6460
Interquartile range,3230

0,1
Standard deviation,1865.3
Coef of variation,0.57748
Kurtosis,-1.2
Mean,3230
MAD,1615.2
Skewness,0
Sum,20869030
Variance,3479200
Memory size,421.0 KiB

Value,Count,Frequency (%),Unnamed: 3
2047,1,0.0%,
3379,1,0.0%,
1306,1,0.0%,
3355,1,0.0%,
5408,1,0.0%,
1314,1,0.0%,
3363,1,0.0%,
5416,1,0.0%,
1322,1,0.0%,
3371,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0,1,0.0%,
1,1,0.0%,
2,1,0.0%,
3,1,0.0%,
4,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
6456,1,0.0%,
6457,1,0.0%,
6458,1,0.0%,
6459,1,0.0%,
6460,1,0.0%,

0,1
Distinct count,5
Unique (%),0.1%
Missing (%),1.5%
Missing (n),95

0,1
整形地,4837
敷地延長,879
不整形地,527

Value,Count,Frequency (%),Unnamed: 3
整形地,4837,74.9%,
敷地延長,879,13.6%,
不整形地,527,8.2%,
間口狭・奥行長,123,1.9%,
(Missing),95,1.5%,

0,1
Distinct count,5
Unique (%),0.1%
Missing (%),3.5%
Missing (n),227
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.048604
Minimum,0
Maximum,6
Zeros (%),95.1%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,0
Maximum,6
Range,6
Interquartile range,0

0,1
Standard deviation,0.48369
Coef of variation,9.9516
Kurtosis,134.51
Mean,0.048604
MAD,0.095837
Skewness,11.402
Sum,303
Variance,0.23396
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,6146,95.1%,
6.0,37,0.6%,
2.0,30,0.5%,
1.0,21,0.3%,
(Missing),227,3.5%,

Value,Count,Frequency (%),Unnamed: 3
0.0,6146,95.1%,
1.0,21,0.3%,
2.0,30,0.5%,
6.0,37,0.6%,

Value,Count,Frequency (%),Unnamed: 3
0.0,6146,95.1%,
1.0,21,0.3%,
2.0,30,0.5%,
6.0,37,0.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
公営,6416
私営,45

Value,Count,Frequency (%),Unnamed: 3
公営,6416,99.3%,
私営,45,0.7%,

0,1
Distinct count,1234
Unique (%),19.1%
Missing (%),0.0%
Missing (n),0

0,1
埼玉県川越市大字寺尾,53
埼玉県戸田市笹目,52
埼玉県さいたま市緑区大字三室,48
Other values (1231),6308

Value,Count,Frequency (%),Unnamed: 3
埼玉県川越市大字寺尾,53,0.8%,
埼玉県戸田市笹目,52,0.8%,
埼玉県さいたま市緑区大字三室,48,0.7%,
埼玉県三郷市泉,43,0.7%,
埼玉県さいたま市見沼区大字大谷,39,0.6%,
埼玉県八潮市大字大瀬,38,0.6%,
埼玉県所沢市上新井,38,0.6%,
埼玉県さいたま市西区大字指扇,32,0.5%,
埼玉県草加市瀬崎,32,0.5%,
埼玉県志木市下宗岡,30,0.5%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（不要）,4777
（要）,1684

Value,Count,Frequency (%),Unnamed: 3
（不要）,4777,73.9%,
（要）,1684,26.1%,

0,1
Distinct count,627
Unique (%),9.7%
Missing (%),3.0%
Missing (n),193
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,43.71
Minimum,0
Maximum,1846.8
Zeros (%),75.5%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,0.0
Median,0.0
Q3,0.0
95-th percentile,199.45
Maximum,1846.8
Range,1846.8
Interquartile range,0.0

0,1
Standard deviation,155.95
Coef of variation,3.5679
Kurtosis,79.119
Mean,43.71
MAD,68.168
Skewness,8.0215
Sum,273970
Variance,24321
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,4879,75.5%,
101.68,31,0.5%,
100.0,28,0.4%,
186.23,22,0.3%,
74.52,19,0.3%,
1765.5,19,0.3%,
230.78,18,0.3%,
425.47,17,0.3%,
178.45,13,0.2%,
49.47,13,0.2%,

Value,Count,Frequency (%),Unnamed: 3
0.0,4879,75.5%,
19.83,1,0.0%,
26.73,1,0.0%,
27.32,1,0.0%,
28.62,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1230.4,3,0.0%,
1240.55,7,0.1%,
1765.5,19,0.3%,
1774.17,5,0.1%,
1846.77,6,0.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（無）,5058
（有）,1403

Value,Count,Frequency (%),Unnamed: 3
（無）,5058,78.3%,
（有）,1403,21.7%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
実測,3317
公簿,3139
公募,5

Value,Count,Frequency (%),Unnamed: 3
実測,3317,51.3%,
公簿,3139,48.6%,
公募,5,0.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（無）,6228
（有）,233

Value,Count,Frequency (%),Unnamed: 3
（無）,6228,96.4%,
（有）,233,3.6%,

0,1
Distinct count,593
Unique (%),9.2%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,27822000
Minimum,2800000
Maximum,62800000
Zeros (%),0.0%

0,1
Minimum,2800000
5-th percentile,17900000
Q1,23300000
Median,27000000
Q3,31800000
95-th percentile,39900000
Maximum,62800000
Range,60000000
Interquartile range,8500000

0,1
Standard deviation,6853500
Coef of variation,0.24634
Kurtosis,1.4087
Mean,27822000
MAD,5290200
Skewness,0.66662
Sum,179755931851
Variance,46970000000000
Memory size,421.0 KiB

Value,Count,Frequency (%),Unnamed: 3
24800000,133,2.1%,
27800000,131,2.0%,
26800000,127,2.0%,
25800000,127,2.0%,
23800000,120,1.9%,
29800000,113,1.7%,
25000000,109,1.7%,
26000000,108,1.7%,
28800000,106,1.6%,
29000000,102,1.6%,

Value,Count,Frequency (%),Unnamed: 3
2800000,1,0.0%,
3000000,1,0.0%,
3500000,1,0.0%,
3600000,1,0.0%,
5500000,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
59980000,1,0.0%,
60000000,1,0.0%,
60980000,1,0.0%,
61800000,1,0.0%,
62800000,2,0.0%,

0,1
Correlation,0.97687

0,1
Distinct count,5
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,57.785
Minimum,40
Maximum,80
Zeros (%),0.0%

0,1
Minimum,40
5-th percentile,50
Q1,60
Median,60
Q3,60
95-th percentile,60
Maximum,80
Range,40
Interquartile range,0

0,1
Standard deviation,4.8953
Coef of variation,0.084715
Kurtosis,3.3299
Mean,57.785
MAD,3.7876
Skewness,-0.010822
Sum,373350
Variance,23.964
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
60,4836,74.8%,
50,1542,23.9%,
80,67,1.0%,
40,13,0.2%,
70,3,0.0%,

Value,Count,Frequency (%),Unnamed: 3
40,13,0.2%,
50,1542,23.9%,
60,4836,74.8%,
70,3,0.0%,
80,67,1.0%,

Value,Count,Frequency (%),Unnamed: 3
40,13,0.2%,
50,1542,23.9%,
60,4836,74.8%,
70,3,0.0%,
80,67,1.0%,

0,1
Distinct count,5
Unique (%),0.1%
Missing (%),8.4%
Missing (n),544
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,5.0989
Minimum,0
Maximum,80
Zeros (%),83.7%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,60
Maximum,80
Range,80
Interquartile range,0

0,1
Standard deviation,16.621
Coef of variation,3.2597
Kurtosis,6.9545
Mean,5.0989
MAD,9.3153
Skewness,2.9768
Sum,30170
Variance,276.25
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,5405,83.7%,
60.0,430,6.7%,
50.0,73,1.1%,
80.0,9,0.1%,
(Missing),544,8.4%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5405,83.7%,
50.0,73,1.1%,
60.0,430,6.7%,
80.0,9,0.1%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5405,83.7%,
50.0,73,1.1%,
60.0,430,6.7%,
80.0,9,0.1%,

0,1
Distinct count,536
Unique (%),8.3%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,119640
Minimum,0
Maximum,1050000
Zeros (%),2.3%

0,1
Minimum,0
5-th percentile,38800
Q1,85300
Median,115000
Q3,147000
95-th percentile,223000
Maximum,1050000
Range,1050000
Interquartile range,61700

0,1
Standard deviation,61268
Coef of variation,0.51213
Kurtosis,28.482
Mean,119640
MAD,41595
Skewness,2.7303
Sum,772961793
Variance,3753800000
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
121000,157,2.4%,
117000,153,2.4%,
0,146,2.3%,
101000,119,1.8%,
118000,117,1.8%,
113000,108,1.7%,
116000,106,1.6%,
119000,92,1.4%,
111000,91,1.4%,
108000,84,1.3%,

Value,Count,Frequency (%),Unnamed: 3
0,146,2.3%,
1000,3,0.0%,
10600,1,0.0%,
10700,5,0.1%,
11400,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
476033,1,0.0%,
522000,1,0.0%,
542000,2,0.0%,
864000,3,0.0%,
1050000,2,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（無）,6353
（有）,108

Value,Count,Frequency (%),Unnamed: 3
（無）,6353,98.3%,
（有）,108,1.7%,

0,1
Distinct count,26
Unique (%),0.4%
Missing (%),59.0%
Missing (n),3814

0,1
眺望良,547
角地,510
裏道,296
Other values (22),1294
(Missing),3814

Value,Count,Frequency (%),Unnamed: 3
眺望良,547,8.5%,
角地,510,7.9%,
裏道,296,4.6%,
宅内高低差あり,232,3.6%,
行き止まり途中,143,2.2%,
交通量多い,117,1.8%,
街道沿い,106,1.6%,
ごみ置き場前,92,1.4%,
二方路,92,1.4%,
行き止まり,91,1.4%,

0,1
Distinct count,26
Unique (%),0.4%
Missing (%),93.3%
Missing (n),6028

0,1
角地,93
二方路,52
交通量多い,51
Other values (22),237
(Missing),6028

Value,Count,Frequency (%),Unnamed: 3
角地,93,1.4%,
二方路,52,0.8%,
交通量多い,51,0.8%,
眺望良,33,0.5%,
床暖房付,32,0.5%,
ごみ置き場前,25,0.4%,
裏道,22,0.3%,
街道沿い,18,0.3%,
エネファーム付,15,0.2%,
行き止まり途中,14,0.2%,

0,1
Distinct count,25
Unique (%),0.4%
Missing (%),95.9%
Missing (n),6195

0,1
行き止まり,34
ごみ置き場前,29
交通量多い,27
Other values (21),176
(Missing),6195

Value,Count,Frequency (%),Unnamed: 3
行き止まり,34,0.5%,
ごみ置き場前,29,0.4%,
交通量多い,27,0.4%,
角地,21,0.3%,
街道沿い,19,0.3%,
裏道,18,0.3%,
高圧線下,15,0.2%,
行き止まり途中,14,0.2%,
車進入困難,14,0.2%,
嫌悪施設隣接,13,0.2%,

0,1
Distinct count,17
Unique (%),0.3%
Missing (%),99.2%
Missing (n),6412

0,1
裏道,10
交通量多い,8
車進入困難,5
Other values (13),26
(Missing),6412

Value,Count,Frequency (%),Unnamed: 3
裏道,10,0.2%,
交通量多い,8,0.1%,
車進入困難,5,0.1%,
行き止まり途中,5,0.1%,
角地,4,0.1%,
宅内高低差あり,3,0.0%,
二方路,3,0.0%,
床暖房付,2,0.0%,
行き止まり,2,0.0%,
前面道が坂途中,1,0.0%,

0,1
Distinct count,6
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
指定無,5837
第2種,240
その他,225
Other values (3),159

Value,Count,Frequency (%),Unnamed: 3
指定無,5837,90.3%,
第2種,240,3.7%,
その他,225,3.5%,
第1種,155,2.4%,
第4種,3,0.0%,
第3種,1,0.0%,

0,1
Distinct count,547
Unique (%),8.5%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,120170
Minimum,12000
Maximum,486000
Zeros (%),0.0%

0,1
Minimum,12000
5-th percentile,47200
Q1,88200
Median,114000
Q3,144000
95-th percentile,213000
Maximum,486000
Range,474000
Interquartile range,55800

0,1
Standard deviation,51723
Coef of variation,0.43043
Kurtosis,3.7258
Mean,120170
MAD,37498
Skewness,1.2348
Sum,776399400
Variance,2675300000
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
106000,123,1.9%,
123000,116,1.8%,
119000,116,1.8%,
146000,114,1.8%,
129000,105,1.6%,
130000,104,1.6%,
110000,104,1.6%,
103000,103,1.6%,
137000,100,1.5%,
108000,99,1.5%,

Value,Count,Frequency (%),Unnamed: 3
12000,1,0.0%,
13300,1,0.0%,
13600,1,0.0%,
15000,5,0.1%,
15300,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
355000,1,0.0%,
359000,1,0.0%,
366000,3,0.0%,
400000,12,0.2%,
486000,3,0.0%,

0,1
Distinct count,28
Unique (%),0.4%
Missing (%),5.5%
Missing (n),355

0,1
2F/4LDK,5003
2F/3LDK,316
3F/4LDK,141
Other values (24),646
(Missing),355

Value,Count,Frequency (%),Unnamed: 3
2F/4LDK,5003,77.4%,
2F/3LDK,316,4.9%,
3F/4LDK,141,2.2%,
2F/5LDK,133,2.1%,
2F/4LDK+S,102,1.6%,
土地売り,80,1.2%,
2F/3LDK+S,79,1.2%,
3F/3LDK+S,57,0.9%,
2F/4DK,47,0.7%,
2F/2LDK,37,0.6%,

0,1
Distinct count,543
Unique (%),8.4%
Missing (%),1.5%
Missing (n),94
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,8.8298
Minimum,0
Maximum,48.3
Zeros (%),0.1%

0,1
Minimum,0.0
5-th percentile,2.5
Q1,6.2
Median,8.5
Q3,10.76
95-th percentile,17.2
Maximum,48.3
Range,48.3
Interquartile range,4.56

0,1
Standard deviation,4.5483
Coef of variation,0.51511
Kurtosis,4.4407
Mean,8.8298
MAD,3.2464
Skewness,1.3494
Sum,56219
Variance,20.687
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
10.0,198,3.1%,
3.0,186,2.9%,
8.0,166,2.6%,
2.7,160,2.5%,
2.5,135,2.1%,
9.0,134,2.1%,
2.0,132,2.0%,
7.0,108,1.7%,
6.0,93,1.4%,
8.6,88,1.4%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5,0.1%,
1.4,1,0.0%,
2.0,132,2.0%,
2.01,2,0.0%,
2.04,2,0.0%,

Value,Count,Frequency (%),Unnamed: 3
34.8,1,0.0%,
34.9,1,0.0%,
35.8,1,0.0%,
43.0,1,0.0%,
48.3,1,0.0%,

0,1
Correlation,0.97687

0,1
Distinct count,19
Unique (%),0.3%
Missing (%),43.1%
Missing (n),2782
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,68.519
Minimum,0
Maximum,300
Zeros (%),25.9%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,100
Q3,100
95-th percentile,165
Maximum,300
Range,300
Interquartile range,100

0,1
Standard deviation,72.182
Coef of variation,1.0535
Kurtosis,1.0229
Mean,68.519
MAD,62.317
Skewness,0.91812
Sum,252080
Variance,5210.3
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,1673,25.9%,
100.0,1103,17.1%,
120.0,293,4.5%,
150.0,171,2.6%,
300.0,115,1.8%,
130.0,92,1.4%,
200.0,54,0.8%,
110.0,50,0.8%,
165.0,45,0.7%,
135.0,44,0.7%,

Value,Count,Frequency (%),Unnamed: 3
0.0,1673,25.9%,
70.0,7,0.1%,
80.0,1,0.0%,
100.0,1103,17.1%,
110.0,50,0.8%,

Value,Count,Frequency (%),Unnamed: 3
150.0,171,2.6%,
165.0,45,0.7%,
180.0,5,0.1%,
200.0,54,0.8%,
300.0,115,1.8%,

0,1
Distinct count,153
Unique (%),2.4%
Missing (%),0.5%
Missing (n),31
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3.1019
Minimum,0
Maximum,604.47
Zeros (%),74.0%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,0.0
Median,0.0
Q3,1.0
95-th percentile,5.55
Maximum,604.47
Range,604.47
Interquartile range,1.0

0,1
Standard deviation,24.062
Coef of variation,7.7574
Kurtosis,264.71
Mean,3.1019
MAD,4.83
Skewness,14.638
Sum,19945
Variance,579
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,4784,74.0%,
3.0,156,2.4%,
2.0,93,1.4%,
4.0,60,0.9%,
2.5,33,0.5%,
3.93,33,0.5%,
157.89,31,0.5%,
6.45,28,0.4%,
2.52,26,0.4%,
2.7,25,0.4%,

Value,Count,Frequency (%),Unnamed: 3
0.0,4784,74.0%,
0.32,1,0.0%,
0.33,3,0.0%,
0.35,7,0.1%,
0.4,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
110.12,9,0.1%,
157.89,31,0.5%,
291.35,6,0.1%,
321.73,12,0.2%,
604.47,3,0.0%,

0,1
Distinct count,443
Unique (%),6.9%
Missing (%),0.4%
Missing (n),29
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,55.936
Minimum,0
Maximum,835.26
Zeros (%),72.0%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,0.0
Median,0.0
Q3,4.32
95-th percentile,359.46
Maximum,835.26
Range,835.26
Interquartile range,4.32

0,1
Standard deviation,138.15
Coef of variation,2.4697
Kurtosis,9.9271
Mean,55.936
MAD,87.995
Skewness,3.0494
Sum,359780
Variance,19085
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,4654,72.0%,
635.41,31,0.5%,
310.9,28,0.4%,
798.88,24,0.4%,
353.2,22,0.3%,
402.59,22,0.3%,
740.94,20,0.3%,
478.21,20,0.3%,
209.0,20,0.3%,
304.7,19,0.3%,

Value,Count,Frequency (%),Unnamed: 3
0.0,4654,72.0%,
0.24,1,0.0%,
0.27,1,0.0%,
0.35,1,0.0%,
0.5,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
635.41,31,0.5%,
703.29,9,0.1%,
740.94,20,0.3%,
798.88,24,0.4%,
835.26,16,0.2%,

0,1
Correlation,0.96273

0,1
Distinct count,726
Unique (%),11.2%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3.841
Minimum,0
Maximum,967
Zeros (%),6.4%

0,1
Minimum,0
5-th percentile,0
Q1,2
Median,3
Q3,5
95-th percentile,9
Maximum,967
Range,967
Interquartile range,3

0,1
Standard deviation,12.315
Coef of variation,3.2062
Kurtosis,5794.2
Mean,3.841
MAD,2.3152
Skewness,74.096
Sum,24817
Variance,151.66
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
1.0,670,10.4%,
2.0,665,10.3%,
3.0,573,8.9%,
0.0,414,6.4%,
5.0,368,5.7%,
6.0,247,3.8%,
4.0,238,3.7%,
7.0,111,1.7%,
2.5,103,1.6%,
2.7,82,1.3%,

Value,Count,Frequency (%),Unnamed: 3
0.0,414,6.4%,
0.1,11,0.2%,
0.2,9,0.1%,
0.4,1,0.0%,
0.5,19,0.3%,

Value,Count,Frequency (%),Unnamed: 3
25.03,1,0.0%,
27.75,1,0.0%,
30.0,1,0.0%,
30.53,1,0.0%,
967.0,1,0.0%,

0,1
Correlation,0.99986

0,1
Distinct count,124
Unique (%),1.9%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,5.6936
Minimum,0.9
Maximum,57
Zeros (%),0.0%

0,1
Minimum,0.9
5-th percentile,3.1
Q1,4.0
Median,5.0
Q3,6.0
95-th percentile,10.7
Maximum,57.0
Range,56.1
Interquartile range,2.0

0,1
Standard deviation,3.0078
Coef of variation,0.52828
Kurtosis,49.831
Mean,5.6936
MAD,1.7692
Skewness,4.8233
Sum,36787
Variance,9.0471
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
4.0,1673,25.9%,
6.0,1609,24.9%,
5.0,325,5.0%,
4.2,205,3.2%,
8.0,190,2.9%,
4.5,187,2.9%,
3.6,134,2.1%,
9.0,93,1.4%,
12.0,90,1.4%,
5.5,87,1.3%,

Value,Count,Frequency (%),Unnamed: 3
0.9,2,0.0%,
1.5,20,0.3%,
1.8,54,0.8%,
1.9,2,0.0%,
2.0,12,0.2%,

Value,Count,Frequency (%),Unnamed: 3
25.3,4,0.1%,
25.4,5,0.1%,
27.0,1,0.0%,
32.0,3,0.0%,
57.0,3,0.0%,

0,1
Distinct count,9
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
東,1311
南,1182
北,1180
Other values (6),2788

Value,Count,Frequency (%),Unnamed: 3
東,1311,20.3%,
南,1182,18.3%,
北,1180,18.3%,
西,1061,16.4%,
北西,494,7.6%,
南西,468,7.2%,
北東,466,7.2%,
南東,298,4.6%,
東南,1,0.0%,

0,1
Distinct count,420
Unique (%),6.5%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,20.442
Minimum,0
Maximum,148
Zeros (%),0.0%

0,1
Minimum,0.0
5-th percentile,6.0
Q1,11.0
Median,16.8
Q3,25.1
95-th percentile,45.7
Maximum,148.0
Range,148.0
Interquartile range,14.1

0,1
Standard deviation,14.185
Coef of variation,0.69393
Kurtosis,13.85
Mean,20.442
MAD,10.042
Skewness,2.5672
Sum,132080
Variance,201.23
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
20.0,103,1.6%,
4.0,79,1.2%,
10.0,75,1.2%,
12.0,73,1.1%,
14.0,71,1.1%,
9.0,62,1.0%,
19.0,60,0.9%,
14.6,59,0.9%,
11.0,56,0.9%,
15.0,56,0.9%,

Value,Count,Frequency (%),Unnamed: 3
0.0,1,0.0%,
1.4,1,0.0%,
2.0,19,0.3%,
2.1,3,0.0%,
2.2,4,0.1%,

Value,Count,Frequency (%),Unnamed: 3
80.4,5,0.1%,
84.7,5,0.1%,
97.4,6,0.1%,
100.5,2,0.0%,
148.0,12,0.2%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
公道,5907
私道,531
法定外,23

Value,Count,Frequency (%),Unnamed: 3
公道,5907,91.4%,
私道,531,8.2%,
法定外,23,0.4%,

0,1
Distinct count,102
Unique (%),1.6%
Missing (%),3.9%
Missing (n),251
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,2.8749
Minimum,0
Maximum,71
Zeros (%),45.9%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,0.0
Median,1.9
Q3,5.0
95-th percentile,8.0
Maximum,71.0
Range,71.0
Interquartile range,5.0

0,1
Standard deviation,4.1434
Coef of variation,1.4412
Kurtosis,103.68
Mean,2.8749
MAD,2.8123
Skewness,7.0065
Sum,17853
Variance,17.168
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,2964,45.9%,
6.0,976,15.1%,
4.0,852,13.2%,
4.2,135,2.1%,
5.0,134,2.1%,
1.8,113,1.7%,
4.5,88,1.4%,
8.0,78,1.2%,
9.0,53,0.8%,
3.3,52,0.8%,

Value,Count,Frequency (%),Unnamed: 3
0.0,2964,45.9%,
0.9,13,0.2%,
1.1,1,0.0%,
1.2,2,0.0%,
1.5,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
20.0,4,0.1%,
21.2,1,0.0%,
25.4,5,0.1%,
70.0,3,0.0%,
71.0,6,0.1%,

0,1
Distinct count,9
Unique (%),0.1%
Missing (%),49.8%
Missing (n),3215

0,1
南,771
西,665
東,529
Other values (5),1281
(Missing),3215

Value,Count,Frequency (%),Unnamed: 3
南,771,11.9%,
西,665,10.3%,
東,529,8.2%,
北,475,7.4%,
南西,267,4.1%,
北西,187,2.9%,
北東,179,2.8%,
南東,173,2.7%,
(Missing),3215,49.8%,

0,1
Distinct count,358
Unique (%),5.5%
Missing (%),3.9%
Missing (n),251
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,12.96
Minimum,0
Maximum,110.5
Zeros (%),46.2%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,0.0
Median,5.4
Q3,21.5
95-th percentile,49.0
Maximum,110.5
Range,110.5
Interquartile range,21.5

0,1
Standard deviation,17.373
Coef of variation,1.3406
Kurtosis,2.8723
Mean,12.96
MAD,13.642
Skewness,1.6129
Sum,80481
Variance,301.83
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,2982,46.2%,
20.0,57,0.9%,
10.6,44,0.7%,
23.4,38,0.6%,
9.1,35,0.5%,
57.7,34,0.5%,
10.0,32,0.5%,
61.6,31,0.5%,
4.0,30,0.5%,
33.0,26,0.4%,

Value,Count,Frequency (%),Unnamed: 3
0.0,2982,46.2%,
0.9,3,0.0%,
1.0,1,0.0%,
1.6,1,0.0%,
1.8,18,0.3%,

Value,Count,Frequency (%),Unnamed: 3
74.0,9,0.1%,
80.4,18,0.3%,
94.5,8,0.1%,
100.4,2,0.0%,
110.5,8,0.1%,

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),49.8%
Missing (n),3215

0,1
公道,2893
私道,327
法定外,26
(Missing),3215

Value,Count,Frequency (%),Unnamed: 3
公道,2893,44.8%,
私道,327,5.1%,
法定外,26,0.4%,
(Missing),3215,49.8%,

0,1
Distinct count,42
Unique (%),0.7%
Missing (%),7.7%
Missing (n),496
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.81721
Minimum,0
Maximum,26
Zeros (%),78.6%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,6
Maximum,26
Range,26
Interquartile range,0

0,1
Standard deviation,2.3384
Coef of variation,2.8615
Kurtosis,37.574
Mean,0.81721
MAD,1.3914
Skewness,4.8008
Sum,4874.6
Variance,5.4682
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,5078,78.6%,
6.0,276,4.3%,
4.0,150,2.3%,
5.0,74,1.1%,
4.2,63,1.0%,
6.2,38,0.6%,
7.5,28,0.4%,
4.5,27,0.4%,
2.4,22,0.3%,
7.9,21,0.3%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5078,78.6%,
0.9,10,0.2%,
1.0,1,0.0%,
1.3,8,0.1%,
1.8,11,0.2%,

Value,Count,Frequency (%),Unnamed: 3
10.0,8,0.1%,
11.5,7,0.1%,
16.0,5,0.1%,
25.0,1,0.0%,
26.0,15,0.2%,

0,1
Distinct count,9
Unique (%),0.1%
Missing (%),86.3%
Missing (n),5574

0,1
北,223
東,187
南,164
Other values (5),313
(Missing),5574

Value,Count,Frequency (%),Unnamed: 3
北,223,3.5%,
東,187,2.9%,
南,164,2.5%,
西,93,1.4%,
南東,87,1.3%,
南西,63,1.0%,
北東,43,0.7%,
北西,27,0.4%,
(Missing),5574,86.3%,

0,1
Distinct count,132
Unique (%),2.0%
Missing (%),7.7%
Missing (n),496
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,5.0659
Minimum,0
Maximum,157.4
Zeros (%),78.6%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,0.0
Median,0.0
Q3,0.0
95-th percentile,37.5
Maximum,157.4
Range,157.4
Interquartile range,0.0

0,1
Standard deviation,15.978
Coef of variation,3.1541
Kurtosis,25.037
Mean,5.0659
MAD,8.639
Skewness,4.4517
Sum,30218
Variance,255.31
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,5078,78.6%,
63.3,31,0.5%,
103.2,28,0.4%,
21.1,23,0.4%,
58.0,22,0.3%,
41.4,22,0.3%,
6.2,18,0.3%,
51.4,18,0.3%,
26.9,18,0.3%,
10.8,16,0.2%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5078,78.6%,
1.8,3,0.0%,
2.1,6,0.1%,
4.2,16,0.2%,
6.0,4,0.1%,

Value,Count,Frequency (%),Unnamed: 3
87.6,7,0.1%,
87.8,2,0.0%,
96.8,9,0.1%,
103.2,28,0.4%,
157.4,10,0.2%,

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),86.3%
Missing (n),5574

0,1
公道,750
私道,113
法定外,24
(Missing),5574

Value,Count,Frequency (%),Unnamed: 3
公道,750,11.6%,
私道,113,1.7%,
法定外,24,0.4%,
(Missing),5574,86.3%,

0,1
Distinct count,12
Unique (%),0.2%
Missing (%),8.4%
Missing (n),541
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.1461
Minimum,0
Maximum,12
Zeros (%),89.0%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,0
Maximum,12
Range,12
Interquartile range,0

0,1
Standard deviation,0.90715
Coef of variation,6.2092
Kurtosis,63.371
Mean,0.1461
MAD,0.28376
Skewness,7.2895
Sum,864.9
Variance,0.82292
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,5749,89.0%,
4.2,36,0.6%,
6.0,35,0.5%,
2.7,18,0.3%,
4.3,15,0.2%,
4.7,15,0.2%,
4.5,15,0.2%,
5.0,15,0.2%,
12.0,9,0.1%,
6.2,8,0.1%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5749,89.0%,
2.7,18,0.3%,
4.0,5,0.1%,
4.2,36,0.6%,
4.3,15,0.2%,

Value,Count,Frequency (%),Unnamed: 3
4.7,15,0.2%,
5.0,15,0.2%,
6.0,35,0.5%,
6.2,8,0.1%,
12.0,9,0.1%,

0,1
Distinct count,8
Unique (%),0.1%
Missing (%),97.4%
Missing (n),6290

0,1
東,71
南,25
北,19
Other values (4),56
(Missing),6290

Value,Count,Frequency (%),Unnamed: 3
東,71,1.1%,
南,25,0.4%,
北,19,0.3%,
北西,15,0.2%,
西,15,0.2%,
南西,15,0.2%,
南東,11,0.2%,
(Missing),6290,97.4%,

0,1
Distinct count,17
Unique (%),0.3%
Missing (%),8.4%
Missing (n),541
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.4292
Minimum,0
Maximum,86
Zeros (%),89.0%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,0
Maximum,86
Range,86
Interquartile range,0

0,1
Standard deviation,9.1732
Coef of variation,6.4185
Kurtosis,47.294
Mean,1.4292
MAD,2.7758
Skewness,6.8323
Sum,8460.7
Variance,84.148
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,5749,89.0%,
46.3,28,0.4%,
77.1,18,0.3%,
66.8,15,0.2%,
59.4,15,0.2%,
62.4,15,0.2%,
70.0,14,0.2%,
27.8,11,0.2%,
18.0,9,0.1%,
86.0,8,0.1%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5749,89.0%,
3.2,6,0.1%,
6.0,7,0.1%,
8.9,5,0.1%,
18.0,9,0.1%,

Value,Count,Frequency (%),Unnamed: 3
62.4,15,0.2%,
66.8,15,0.2%,
70.0,14,0.2%,
77.1,18,0.3%,
86.0,8,0.1%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),97.4%
Missing (n),6290

0,1
公道,152
私道,19
(Missing),6290

Value,Count,Frequency (%),Unnamed: 3
公道,152,2.4%,
私道,19,0.3%,
(Missing),6290,97.4%,

0,1
Distinct count,5
Unique (%),0.1%
Missing (%),1.5%
Missing (n),94

0,1
問題なし,6054
歩道あり,189
未舗装,79
(Missing),94

Value,Count,Frequency (%),Unnamed: 3
問題なし,6054,93.7%,
歩道あり,189,2.9%,
未舗装,79,1.2%,
歩道+緑地帯あり,45,0.7%,
(Missing),94,1.5%,

0,1
Distinct count,27
Unique (%),0.4%
Missing (%),0.0%
Missing (n),0

0,1
東武東上線,879
東武伊勢崎線,766
JR高崎線,697
Other values (24),4119

Value,Count,Frequency (%),Unnamed: 3
東武東上線,879,13.6%,
東武伊勢崎線,766,11.9%,
JR高崎線,697,10.8%,
西武池袋線,574,8.9%,
JR武蔵野線,489,7.6%,
JR東北本線【宇都宮線】,431,6.7%,
東武野田線,405,6.3%,
埼玉高速鉄道線,333,5.2%,
JR京浜東北線,323,5.0%,
西武新宿線,314,4.9%,

0,1
Distinct count,17
Unique (%),0.3%
Missing (%),97.0%
Missing (n),6269

0,1
東武伊勢崎線,33
東武東上線,29
JR東北本線【宇都宮線】,25
Other values (13),105
(Missing),6269

Value,Count,Frequency (%),Unnamed: 3
東武伊勢崎線,33,0.5%,
東武東上線,29,0.4%,
JR東北本線【宇都宮線】,25,0.4%,
JR武蔵野線,23,0.4%,
JR高崎線,15,0.2%,
西武池袋線,14,0.2%,
JR京浜東北線,14,0.2%,
埼玉高速鉄道線,11,0.2%,
東武野田線,10,0.2%,
東武日光線,7,0.1%,

0,1
Distinct count,248
Unique (%),3.8%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,83048
Minimum,0
Maximum,320000
Zeros (%),11.0%

0,1
Minimum,0
5-th percentile,0
Q1,60000
Median,85500
Q3,105000
95-th percentile,155000
Maximum,320000
Range,320000
Interquartile range,45000

0,1
Standard deviation,45109
Coef of variation,0.54317
Kurtosis,1.2326
Mean,83048
MAD,33025
Skewness,0.26522
Sum,536570032
Variance,2034800000
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0,708,11.0%,
105000,241,3.7%,
120000,232,3.6%,
110000,222,3.4%,
115000,202,3.1%,
100000,179,2.8%,
86000,132,2.0%,
155000,121,1.9%,
85000,120,1.9%,
94000,114,1.8%,

Value,Count,Frequency (%),Unnamed: 3
0,708,11.0%,
10000,3,0.0%,
10800,2,0.0%,
10900,1,0.0%,
11700,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
255000,3,0.0%,
261000,9,0.1%,
284500,1,0.0%,
292500,3,0.0%,
320000,3,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),42.3%
Missing (n),2731

0,1
○,3730
(Missing),2731

Value,Count,Frequency (%),Unnamed: 3
○,3730,57.7%,
(Missing),2731,42.3%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.7%
Missing (n),6377

0,1
○,84
(Missing),6377

Value,Count,Frequency (%),Unnamed: 3
○,84,1.3%,
(Missing),6377,98.7%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.1%
Missing (n),6402

0,1
○,59
(Missing),6402

Value,Count,Frequency (%),Unnamed: 3
○,59,0.9%,
(Missing),6402,99.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),97.6%
Missing (n),6307

0,1
○,154
(Missing),6307

Value,Count,Frequency (%),Unnamed: 3
○,154,2.4%,
(Missing),6307,97.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),95.4%
Missing (n),6166

0,1
○,295
(Missing),6166

Value,Count,Frequency (%),Unnamed: 3
○,295,4.6%,
(Missing),6166,95.4%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),94.3%
Missing (n),6092

0,1
○,369
(Missing),6092

Value,Count,Frequency (%),Unnamed: 3
○,369,5.7%,
(Missing),6092,94.3%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.8%
Missing (n),6385

0,1
○,76
(Missing),6385

Value,Count,Frequency (%),Unnamed: 3
○,76,1.2%,
(Missing),6385,98.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),44.1%
Missing (n),2852

0,1
○,3609
(Missing),2852

Value,Count,Frequency (%),Unnamed: 3
○,3609,55.9%,
(Missing),2852,44.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.8%
Missing (n),6385

0,1
○,76
(Missing),6385

Value,Count,Frequency (%),Unnamed: 3
○,76,1.2%,
(Missing),6385,98.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.6%
Missing (n),6438

0,1
○,23
(Missing),6438

Value,Count,Frequency (%),Unnamed: 3
○,23,0.4%,
(Missing),6438,99.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),97.8%
Missing (n),6322

0,1
○,139
(Missing),6322

Value,Count,Frequency (%),Unnamed: 3
○,139,2.2%,
(Missing),6322,97.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),94.9%
Missing (n),6130

0,1
○,331
(Missing),6130

Value,Count,Frequency (%),Unnamed: 3
○,331,5.1%,
(Missing),6130,94.9%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),94.1%
Missing (n),6079

0,1
○,382
(Missing),6079

Value,Count,Frequency (%),Unnamed: 3
○,382,5.9%,
(Missing),6079,94.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.1%
Missing (n),6403

0,1
○,58
(Missing),6403

Value,Count,Frequency (%),Unnamed: 3
○,58,0.9%,
(Missing),6403,99.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),43.8%
Missing (n),2831

0,1
○,3630
(Missing),2831

Value,Count,Frequency (%),Unnamed: 3
○,3630,56.2%,
(Missing),2831,43.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.1%
Missing (n),6336

0,1
○,125
(Missing),6336

Value,Count,Frequency (%),Unnamed: 3
○,125,1.9%,
(Missing),6336,98.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.2%
Missing (n),6408

0,1
○,53
(Missing),6408

Value,Count,Frequency (%),Unnamed: 3
○,53,0.8%,
(Missing),6408,99.2%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),97.8%
Missing (n),6320

0,1
○,141
(Missing),6320

Value,Count,Frequency (%),Unnamed: 3
○,141,2.2%,
(Missing),6320,97.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),96.1%
Missing (n),6209

0,1
○,252
(Missing),6209

Value,Count,Frequency (%),Unnamed: 3
○,252,3.9%,
(Missing),6209,96.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),95.8%
Missing (n),6190

0,1
○,271
(Missing),6190

Value,Count,Frequency (%),Unnamed: 3
○,271,4.2%,
(Missing),6190,95.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.5%
Missing (n),6361

0,1
○,100
(Missing),6361

Value,Count,Frequency (%),Unnamed: 3
○,100,1.5%,
(Missing),6361,98.5%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),40.7%
Missing (n),2628

0,1
○,3833
(Missing),2628

Value,Count,Frequency (%),Unnamed: 3
○,3833,59.3%,
(Missing),2628,40.7%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.1%
Missing (n),6402

0,1
○,59
(Missing),6402

Value,Count,Frequency (%),Unnamed: 3
○,59,0.9%,
(Missing),6402,99.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.5%
Missing (n),6428

0,1
○,33
(Missing),6428

Value,Count,Frequency (%),Unnamed: 3
○,33,0.5%,
(Missing),6428,99.5%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),97.8%
Missing (n),6320

0,1
○,141
(Missing),6320

Value,Count,Frequency (%),Unnamed: 3
○,141,2.2%,
(Missing),6320,97.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),96.0%
Missing (n),6203

0,1
○,258
(Missing),6203

Value,Count,Frequency (%),Unnamed: 3
○,258,4.0%,
(Missing),6203,96.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),94.7%
Missing (n),6119

0,1
○,342
(Missing),6119

Value,Count,Frequency (%),Unnamed: 3
○,342,5.3%,
(Missing),6119,94.7%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.2%
Missing (n),6411

0,1
○,50
(Missing),6411

Value,Count,Frequency (%),Unnamed: 3
○,50,0.8%,
(Missing),6411,99.2%,

0,1
Distinct count,56
Unique (%),0.9%
Missing (%),1.5%
Missing (n),95

0,1
南,1399
北,1038
東,1025
Other values (52),2904

Value,Count,Frequency (%),Unnamed: 3
南,1399,21.7%,
北,1038,16.1%,
東,1025,15.9%,
西,987,15.3%,
南西,419,6.5%,
南東,376,5.8%,
北西,350,5.4%,
北東,337,5.2%,
東＋北,56,0.9%,
東＋南,48,0.7%,

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),1.5%
Missing (n),96

0,1
良い,3467
普通,2588
悪い,310
(Missing),96

Value,Count,Frequency (%),Unnamed: 3
良い,3467,53.7%,
普通,2588,40.1%,
悪い,310,4.8%,
(Missing),96,1.5%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),56.1%
Missing (n),3623

0,1
○,2838
(Missing),3623

Value,Count,Frequency (%),Unnamed: 3
○,2838,43.9%,
(Missing),3623,56.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),96.9%
Missing (n),6261

0,1
○,200
(Missing),6261

Value,Count,Frequency (%),Unnamed: 3
○,200,3.1%,
(Missing),6261,96.9%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),97.5%
Missing (n),6297

0,1
○,164
(Missing),6297

Value,Count,Frequency (%),Unnamed: 3
○,164,2.5%,
(Missing),6297,97.5%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),75.1%
Missing (n),4855

0,1
○,1606
(Missing),4855

Value,Count,Frequency (%),Unnamed: 3
○,1606,24.9%,
(Missing),4855,75.1%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.3%
Missing (n),6419

0,1
○,42
(Missing),6419

Value,Count,Frequency (%),Unnamed: 3
○,42,0.7%,
(Missing),6419,99.3%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),97.8%
Missing (n),6322

0,1
○,139
(Missing),6322

Value,Count,Frequency (%),Unnamed: 3
○,139,2.2%,
(Missing),6322,97.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.6%
Missing (n),6436

0,1
○,25
(Missing),6436

Value,Count,Frequency (%),Unnamed: 3
○,25,0.4%,
(Missing),6436,99.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.0%
Missing (n),6394

0,1
○,67
(Missing),6394

Value,Count,Frequency (%),Unnamed: 3
○,67,1.0%,
(Missing),6394,99.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),11.7%
Missing (n),755

0,1
○,5706
(Missing),755

Value,Count,Frequency (%),Unnamed: 3
○,5706,88.3%,
(Missing),755,11.7%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),97.3%
Missing (n),6288

0,1
○,173
(Missing),6288

Value,Count,Frequency (%),Unnamed: 3
○,173,2.7%,
(Missing),6288,97.3%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.2%
Missing (n),6410

0,1
○,51
(Missing),6410

Value,Count,Frequency (%),Unnamed: 3
○,51,0.8%,
(Missing),6410,99.2%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.7%
Missing (n),6374

0,1
○,87
(Missing),6374

Value,Count,Frequency (%),Unnamed: 3
○,87,1.3%,
(Missing),6374,98.7%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.6%
Missing (n),6373

0,1
○,88
(Missing),6373

Value,Count,Frequency (%),Unnamed: 3
○,88,1.4%,
(Missing),6373,98.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),96.6%
Missing (n),6242

0,1
○,219
(Missing),6242

Value,Count,Frequency (%),Unnamed: 3
○,219,3.4%,
(Missing),6242,96.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.9%
Missing (n),6392

0,1
○,69
(Missing),6392

Value,Count,Frequency (%),Unnamed: 3
○,69,1.1%,
(Missing),6392,98.9%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),100.0%
Missing (n),6458

0,1
○,3
(Missing),6458

Value,Count,Frequency (%),Unnamed: 3
○,3,0.0%,
(Missing),6458,100.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.3%
Missing (n),6413

0,1
○,48
(Missing),6413

Value,Count,Frequency (%),Unnamed: 3
○,48,0.7%,
(Missing),6413,99.3%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),99.6%
Missing (n),6436

0,1
○,25
(Missing),6436

Value,Count,Frequency (%),Unnamed: 3
○,25,0.4%,
(Missing),6436,99.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),98.7%
Missing (n),6375

0,1
○,86
(Missing),6375

Value,Count,Frequency (%),Unnamed: 3
○,86,1.3%,
(Missing),6375,98.7%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（不要）,6275
（要）,186

Value,Count,Frequency (%),Unnamed: 3
（不要）,6275,97.1%,
（要）,186,2.9%,

0,1
Distinct count,25
Unique (%),0.4%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4.5393
Minimum,0
Maximum,31
Zeros (%),0.9%

0,1
Minimum,0
5-th percentile,1
Q1,2
Median,3
Q3,6
95-th percentile,16
Maximum,31
Range,31
Interquartile range,4

0,1
Standard deviation,4.6516
Coef of variation,1.0247
Kurtosis,7.1192
Mean,4.5393
MAD,3.2113
Skewness,2.3996
Sum,29324
Variance,21.638
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
1.0,1454,22.5%,
2.0,1283,19.9%,
3.0,871,13.5%,
4.0,659,10.2%,
5.0,467,7.2%,
6.0,356,5.5%,
7.0,278,4.3%,
8.0,207,3.2%,
9.0,131,2.0%,
10.0,122,1.9%,

Value,Count,Frequency (%),Unnamed: 3
0.0,60,0.9%,
1.0,1454,22.5%,
2.0,1283,19.9%,
3.0,871,13.5%,
4.0,659,10.2%,

Value,Count,Frequency (%),Unnamed: 3
19.0,20,0.3%,
20.0,40,0.6%,
22.0,22,0.3%,
24.0,24,0.4%,
31.0,31,0.5%,

0,1
Distinct count,4145
Unique (%),64.2%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,140.29
Minimum,28.2
Maximum,643.99
Zeros (%),0.0%

0,1
Minimum,28.2
5-th percentile,89.37
Q1,110.08
Median,129.0
Q3,154.66
95-th percentile,225.82
Maximum,643.99
Range,615.79
Interquartile range,44.58

0,1
Standard deviation,50.866
Coef of variation,0.36258
Kurtosis,9.277
Mean,140.29
MAD,34.113
Skewness,2.4342
Sum,906400
Variance,2587.3
Memory size,421.0 KiB

Value,Count,Frequency (%),Unnamed: 3
100.0,65,1.0%,
120.0,40,0.6%,
100.1,23,0.4%,
100.09,20,0.3%,
100.01,19,0.3%,
150.0,19,0.3%,
165.0,19,0.3%,
100.05,16,0.2%,
130.0,15,0.2%,
120.1,15,0.2%,

Value,Count,Frequency (%),Unnamed: 3
28.2,1,0.0%,
37.27,1,0.0%,
39.16,1,0.0%,
43.49,1,0.0%,
52.28,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
472.88,1,0.0%,
476.53,1,0.0%,
513.13,1,0.0%,
551.38,1,0.0%,
643.99,1,0.0%,

0,1
Distinct count,2376
Unique (%),36.8%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,145.05
Minimum,39.16
Maximum,1945.5
Zeros (%),0.0%

0,1
Minimum,39.16
5-th percentile,89.67
Q1,112.55
Median,132.0
Q3,158.4
95-th percentile,242.27
Maximum,1945.5
Range,1906.3
Interquartile range,45.85

0,1
Standard deviation,65.144
Coef of variation,0.44912
Kurtosis,194.49
Mean,145.05
MAD,36.886
Skewness,8.8854
Sum,937160
Variance,4243.8
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
112.19,31,0.5%,
179.51,28,0.4%,
113.73,24,0.4%,
140.85,22,0.3%,
124.14,22,0.3%,
110.02,22,0.3%,
138.68,21,0.3%,
156.13,20,0.3%,
206.56,20,0.3%,
130.76,20,0.3%,

Value,Count,Frequency (%),Unnamed: 3
39.16,1,0.0%,
51.62,1,0.0%,
54.03,1,0.0%,
54.4,1,0.0%,
57.59,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
591.63,3,0.0%,
643.99,1,0.0%,
720.06,4,0.1%,
947.81,2,0.0%,
1945.5,2,0.0%,

0,1
Distinct count,2314
Unique (%),35.8%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,180.8
Minimum,0
Maximum,209320
Zeros (%),0.0%

0,1
Minimum,0.0
5-th percentile,92.84
Q1,115.82
Median,136.0
Q3,165.23
95-th percentile,244.52
Maximum,209320.0
Range,209320.0
Interquartile range,49.41

0,1
Standard deviation,2602.8
Coef of variation,14.396
Kurtosis,6455.3
Mean,180.8
MAD,84.993
Skewness,80.327
Sum,1168200
Variance,6774500
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
130.0,43,0.7%,
114.0,39,0.6%,
100.0,34,0.5%,
145.0,33,0.5%,
244.52,28,0.4%,
120.0,28,0.4%,
147.0,26,0.4%,
116.0,26,0.4%,
135.0,26,0.4%,
125.0,25,0.4%,

Value,Count,Frequency (%),Unnamed: 3
0.0,1,0.0%,
52.28,1,0.0%,
54.03,1,0.0%,
54.4,1,0.0%,
57.59,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
476.53,1,0.0%,
514.9,1,0.0%,
551.38,2,0.0%,
643.99,1,0.0%,
209315.0,1,0.0%,

0,1
Correlation,0.96362

0,1
Distinct count,18
Unique (%),0.3%
Missing (%),78.5%
Missing (n),5070
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4.7735
Minimum,1
Maximum,28
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,3
Median,4
Q3,7
95-th percentile,11
Maximum,28
Range,27
Interquartile range,4

0,1
Standard deviation,3.0319
Coef of variation,0.63515
Kurtosis,3.1551
Mean,4.7735
MAD,2.4003
Skewness,1.2996
Sum,6640
Variance,9.1926
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
3.0,269,4.2%,
2.0,225,3.5%,
4.0,159,2.5%,
7.0,143,2.2%,
5.0,142,2.2%,
1.0,118,1.8%,
6.0,116,1.8%,
8.0,57,0.9%,
9.0,52,0.8%,
10.0,36,0.6%,

Value,Count,Frequency (%),Unnamed: 3
1.0,118,1.8%,
2.0,225,3.5%,
3.0,269,4.2%,
4.0,159,2.5%,
5.0,142,2.2%,

Value,Count,Frequency (%),Unnamed: 3
13.0,24,0.4%,
14.0,8,0.1%,
15.0,1,0.0%,
19.0,2,0.0%,
28.0,1,0.0%,

0,1
Distinct count,10
Unique (%),0.2%
Missing (%),98.6%
Missing (n),6371
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3.6222
Minimum,1
Maximum,9
Zeros (%),0.0%

0,1
Minimum,1.0
5-th percentile,1.45
Q1,3.0
Median,3.0
Q3,4.0
95-th percentile,7.0
Maximum,9.0
Range,8.0
Interquartile range,1.0

0,1
Standard deviation,1.6324
Coef of variation,0.45066
Kurtosis,1.1792
Mean,3.6222
MAD,1.241
Skewness,1.0927
Sum,326
Variance,2.6647
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
3.0,40,0.6%,
4.0,14,0.2%,
2.0,11,0.2%,
5.0,9,0.1%,
7.0,7,0.1%,
1.0,5,0.1%,
6.0,2,0.0%,
8.0,1,0.0%,
9.0,1,0.0%,
(Missing),6371,98.6%,

Value,Count,Frequency (%),Unnamed: 3
1.0,5,0.1%,
2.0,11,0.2%,
3.0,40,0.6%,
4.0,14,0.2%,
5.0,9,0.1%,

Value,Count,Frequency (%),Unnamed: 3
5.0,9,0.1%,
6.0,2,0.0%,
7.0,7,0.1%,
8.0,1,0.0%,
9.0,1,0.0%,

0,1
Distinct count,12
Unique (%),0.2%
Missing (%),3.8%
Missing (n),243
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.35092
Minimum,0
Maximum,18
Zeros (%),86.7%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,2
Maximum,18
Range,18
Interquartile range,0

0,1
Standard deviation,1.6729
Coef of variation,4.7672
Kurtosis,57.538
Mean,0.35092
MAD,0.63208
Skewness,7.1052
Sum,2182
Variance,2.7985
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,5600,86.7%,
1.0,275,4.3%,
2.0,136,2.1%,
5.0,37,0.6%,
3.0,33,0.5%,
4.0,31,0.5%,
10.0,28,0.4%,
7.0,25,0.4%,
13.0,22,0.3%,
18.0,19,0.3%,

Value,Count,Frequency (%),Unnamed: 3
0.0,5600,86.7%,
1.0,275,4.3%,
2.0,136,2.1%,
3.0,33,0.5%,
4.0,31,0.5%,

Value,Count,Frequency (%),Unnamed: 3
7.0,25,0.4%,
10.0,28,0.4%,
12.0,12,0.2%,
13.0,22,0.3%,
18.0,19,0.3%,

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
市街化区域,5921
市街化調整区域,475
非線引き区域,64

Value,Count,Frequency (%),Unnamed: 3
市街化区域,5921,91.6%,
市街化調整区域,475,7.4%,
非線引き区域,64,1.0%,
都市計画区域外,1,0.0%,

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),99.0%
Missing (n),6394

0,1
市街化区域,64
準都市計画区域,2
市街化調整区域,1
(Missing),6394

Value,Count,Frequency (%),Unnamed: 3
市街化区域,64,1.0%,
準都市計画区域,2,0.0%,
市街化調整区域,1,0.0%,
(Missing),6394,99.0%,

0,1
Distinct count,692
Unique (%),10.7%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,96.303
Minimum,0
Maximum,143.25
Zeros (%),5.6%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,96.38
Median,102.26
Q3,105.99
95-th percentile,111.78
Maximum,143.25
Range,143.25
Interquartile range,9.61

0,1
Standard deviation,24.355
Coef of variation,0.2529
Kurtosis,10.688
Mean,96.303
MAD,12.307
Skewness,-3.3875
Sum,622220
Variance,593.19
Memory size,421.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,360,5.6%,
105.99,347,5.4%,
93.0,299,4.6%,
105.16,187,2.9%,
105.98,169,2.6%,
108.47,123,1.9%,
104.33,115,1.8%,
103.5,90,1.4%,
99.36,87,1.3%,
105.57,82,1.3%,

Value,Count,Frequency (%),Unnamed: 3
0.0,360,5.6%,
54.77,1,0.0%,
62.46,1,0.0%,
65.51,1,0.0%,
65.61,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
126.69,12,0.2%,
127.1,1,0.0%,
128.47,4,0.1%,
132.49,2,0.0%,
143.25,1,0.0%,

0,1
Distinct count,797
Unique (%),12.3%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,100.24
Minimum,0
Maximum,320.45
Zeros (%),1.4%

0,1
Minimum,0.0
5-th percentile,90.0
Q1,96.46
Median,102.46
Q3,105.99
95-th percentile,110.54
Maximum,320.45
Range,320.45
Interquartile range,9.53

0,1
Standard deviation,14.832
Coef of variation,0.14796
Kurtosis,51.689
Mean,100.24
MAD,7.011
Skewness,-2.3242
Sum,647580
Variance,219.98
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
105.99,461,7.1%,
93.0,302,4.7%,
96.0,128,2.0%,
95.87,97,1.5%,
0.0,88,1.4%,
97.0,85,1.3%,
105.16,75,1.2%,
99.18,73,1.1%,
95.0,70,1.1%,
100.0,60,0.9%,

Value,Count,Frequency (%),Unnamed: 3
0.0,88,1.4%,
35.17,3,0.0%,
55.89,2,0.0%,
59.2,5,0.1%,
64.96,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
156.81,3,0.0%,
192.46,2,0.0%,
217.56,2,0.0%,
223.16,2,0.0%,
320.45,3,0.0%,

0,1
Distinct count,476
Unique (%),7.4%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,101.67
Minimum,0
Maximum,180.89
Zeros (%),1.3%

0,1
Minimum,0.0
5-th percentile,92.0
Q1,98.01
Median,104.49
Q3,106.6
95-th percentile,112.61
Maximum,180.89
Range,180.89
Interquartile range,8.59

0,1
Standard deviation,13.861
Coef of variation,0.13634
Kurtosis,36.166
Mean,101.67
MAD,7.0976
Skewness,-5.1084
Sum,656870
Variance,192.13
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
105.99,892,13.8%,
93.0,299,4.6%,
105.16,153,2.4%,
99.18,131,2.0%,
96.0,130,2.0%,
108.89,105,1.6%,
104.33,94,1.5%,
108.47,93,1.4%,
95.87,89,1.4%,
0.0,84,1.3%,

Value,Count,Frequency (%),Unnamed: 3
0.0,84,1.3%,
11.13,2,0.0%,
30.61,1,0.0%,
62.46,1,0.0%,
64.96,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
127.1,6,0.1%,
127.98,2,0.0%,
129.18,8,0.1%,
131.65,4,0.1%,
180.89,3,0.0%,

0,1
Distinct count,460
Unique (%),7.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,96.516
Minimum,0
Maximum,126.69
Zeros (%),3.7%

0,1
Minimum,0.0
5-th percentile,83.0
Q1,95.22
Median,100.0
Q3,105.5
95-th percentile,108.88
Maximum,126.69
Range,126.69
Interquartile range,10.28

0,1
Standard deviation,19.993
Coef of variation,0.20715
Kurtosis,17.164
Mean,96.516
MAD,9.4899
Skewness,-4.1162
Sum,623590
Variance,399.73
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
105.99,460,7.1%,
93.0,333,5.2%,
0.0,239,3.7%,
105.16,196,3.0%,
96.0,154,2.4%,
99.18,109,1.7%,
108.47,106,1.6%,
104.33,94,1.5%,
105.0,93,1.4%,
97.0,93,1.4%,

Value,Count,Frequency (%),Unnamed: 3
0.0,239,3.7%,
64.96,1,0.0%,
67.0,2,0.0%,
71.0,1,0.0%,
71.08,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
120.07,2,0.0%,
122.55,1,0.0%,
123.37,1,0.0%,
125.86,1,0.0%,
126.69,1,0.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
宅内処理,6027
公共下水,329
側溝,105

Value,Count,Frequency (%),Unnamed: 3
宅内処理,6027,93.3%,
公共下水,329,5.1%,
側溝,105,1.6%,

0,1
Distinct count,70
Unique (%),1.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.3816
Minimum,0
Maximum,6
Zeros (%),21.0%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,0.1
Median,0.3
Q3,0.5
95-th percentile,1.2
Maximum,6.0
Range,6.0
Interquartile range,0.4

0,1
Standard deviation,0.47153
Coef of variation,1.2357
Kurtosis,21.517
Mean,0.3816
MAD,0.29054
Skewness,3.6521
Sum,2465.5
Variance,0.22234
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,1354,21.0%,
0.2,1132,17.5%,
0.3,953,14.8%,
0.5,694,10.7%,
0.4,611,9.5%,
0.1,471,7.3%,
0.6,299,4.6%,
0.8,182,2.8%,
1.0,137,2.1%,
0.7,88,1.4%,

Value,Count,Frequency (%),Unnamed: 3
0.0,1354,21.0%,
0.07,1,0.0%,
0.1,471,7.3%,
0.11,1,0.0%,
0.12,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
4.0,8,0.1%,
4.4,2,0.0%,
4.7,1,0.0%,
4.9,4,0.1%,
6.0,1,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（無）,6151
（有）,310

Value,Count,Frequency (%),Unnamed: 3
（無）,6151,95.2%,
（有）,310,4.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
（不要）,5968
（要）,493

Value,Count,Frequency (%),Unnamed: 3
（不要）,5968,92.4%,
（要）,493,7.6%,

0,1
Distinct count,10
Unique (%),0.2%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,165.55
Minimum,60
Maximum,400
Zeros (%),0.0%

0,1
Minimum,60
5-th percentile,80
Q1,100
Median,200
Q3,200
95-th percentile,200
Maximum,400
Range,340
Interquartile range,100

0,1
Standard deviation,51.551
Coef of variation,0.31139
Kurtosis,-0.17948
Mean,165.55
MAD,46.4
Skewness,-0.48595
Sum,1069630
Variance,2657.5
Memory size,101.0 KiB

Value,Count,Frequency (%),Unnamed: 3
200,4205,65.1%,
100,1133,17.5%,
80,824,12.8%,
150,253,3.9%,
400,18,0.3%,
160,10,0.2%,
60,9,0.1%,
300,6,0.1%,
70,2,0.0%,
180,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
60,9,0.1%,
70,2,0.0%,
80,824,12.8%,
100,1133,17.5%,
150,253,3.9%,

Value,Count,Frequency (%),Unnamed: 3
160,10,0.2%,
180,1,0.0%,
200,4205,65.1%,
300,6,0.1%,
400,18,0.3%,

0,1
Correlation,0.98142

0,1
Distinct count,12
Unique (%),0.2%
Missing (%),0.0%
Missing (n),0

0,1
第一種低層住居専用地域,2186
第一種住居地域,1420
第一種中高層住居専用地域,1401
Other values (9),1454

Value,Count,Frequency (%),Unnamed: 3
第一種低層住居専用地域,2186,33.8%,
第一種住居地域,1420,22.0%,
第一種中高層住居専用地域,1401,21.7%,
指定のない区域,388,6.0%,
第二種中高層住居専用地域,313,4.8%,
準工業地域,275,4.3%,
第二種住居地域,207,3.2%,
準住居地域,74,1.1%,
工業地域,69,1.1%,
第二種低層住居専用地域,61,0.9%,

0,1
Distinct count,10
Unique (%),0.2%
Missing (%),92.1%
Missing (n),5949

0,1
第一種住居地域,192
第一種低層住居専用地域,80
第一種中高層住居専用地域,72
Other values (6),168
(Missing),5949

Value,Count,Frequency (%),Unnamed: 3
第一種住居地域,192,3.0%,
第一種低層住居専用地域,80,1.2%,
第一種中高層住居専用地域,72,1.1%,
準住居地域,62,1.0%,
第二種住居地域,41,0.6%,
第二種中高層住居専用地域,38,0.6%,
第二種低層住居専用地域,9,0.1%,
準工業地域,9,0.1%,
近隣商業地域,9,0.1%,
(Missing),5949,92.1%,

Unnamed: 0,index,id,keiyaku_pr_x,mean,error,pj_no,keiyaku_pr_y,tc_mseki,tt_mseki,levelplan,fukuin,road_st,magutchi,setsudo_hi,setsudo_kj,jigata,hiatari,niwasaki,garage,kobetsu1,kobetsu2,kobetsu3,kobetsu4,jukyo,chiseki_js_hb,chiseki_kb_hb,yoto1,yoto2,kempei1,kempei2,yoseki1,yoseki2,josui,gesui,gas,usui,tateuri_su,tochiuri_su,joken_su,hy1f_date_su,hy2f_date_su,hy3f_date_su,road1_hk,road1_sb,road1_fi,road1_mg,road2_hk,road2_sb,road2_fi,road2_mg,road3_hk,road3_sb,road3_fi,road3_mg,road4_hk,road4_sb,road4_fi,road4_mg,kaoku_um,kaoku_hb,yheki_kotei,yheki_umu,yheki_yohi,gk_sho_kyori,gk_chu_kyori,kborjs,hw_status,toshikuiki1,toshikuiki2,kodochiku,chikukeikaku,keikakuroad,kaihatsukyoka,t53kyoka,hokakyoka,bokachiiki,minmenseki,hokakisei1,hokakisei2,hokakisei3,hokakisei4,kinshijiko,rosenka_hb,koji_hb,kijun_hb,mseki_yt_hb,mseki_rd_hb,mseki_dp_hb,tc_mseki_min_hb,tc_mseki_max_hb,tt_mseki_min_hb,tt_mseki_max_hb,tc_mseki_avg_hb,tt_mseki_avg_hb,fi4m_yohi,fi3m_yohi,fi4m_kyori,fi3m_kyori,bus_yohi,bus_hon,sho_conv,sho_super,sho_shoten,sho_market,shu_jutaku,shu_park,shu_shop,shu_factory,shu_hvline,shu_tower,shu_bochi,shu_sogi,shu_zoki,shu_kokyo,shu_highway,shu_kaido,shu_line_ari,shu_line_nashi,shu_soon,gk_yoc_tm,gk_sho_tm,gk_chu_tm,rs_e_kdate2,rs_e_kdate3,rs_e_parking,rs_e_zoki,rs_e_m_ari,rs_e_m_nashi,rs_e_tahata,rs_w_kdate2,rs_w_kdate3,rs_w_parking,rs_w_zoki,rs_w_m_ari,rs_w_m_nashi,rs_w_tahata,rs_s_kdate2,rs_s_kdate3,rs_s_parking,rs_s_zoki,rs_s_m_ari,rs_s_m_nashi,rs_s_tahata,rs_n_kdate2,rs_n_kdate3,rs_n_parking,rs_n_zoki,rs_n_m_ari,rs_n_m_nashi,rs_n_tahata,rosen_nm1,eki_nm1,bas_toho1,eki_kyori1,bastei_nm1,teiho1,rosen_nm2,eki_nm2,bas_toho2,eki_kyori2,bastei_nm2,teiho2
0,0,train_0000,39800000,40068180,0.673819,0,39800000,109.26,104.43,2F/4LDK,4.0,問題なし,9.9,東,良い,整形地,普通,4.0,1,床暖房付,,,,埼玉県朝霞市泉水,109.26,109.1,工業地域,,60,0.0,200,0.0,公営,公共下水,個別プロパン,宅内処理,1.0,0.0,0.0,0.0,1.0,0.0,北,私道,4.0,7.0,東,私道,4.0,10.0,,,0.0,0.0,,,0.0,0.0,（無）,0.0,0.6,（無）,（不要）,800,1000,実測,更地,市街化区域,,第2種,（無）,（無）,（不要）,（不要）,（不要）,防火指定無,0.0,,,,,（無）,135000,188000,197000,109.26,0.0,0.0,109.26,109.26,104.43,104.43,109.26,104.43,（不要）,（不要）,0.0,0.0,（不要）,,,,,,○,,,,,,,,,,,,,,,19.0,10,13,,,,,,,,○,,,,,,,,○,,,,,,,,,,,,,東武東上線,朝霞台,徒歩,17,,,,,,,,
1,1,train_0001,22300000,22339068,0.175193,1,22300000,136.11,105.16,2F/4LDK,4.2,問題なし,9.3,北西,普通,不整形地,普通,3.0,1,,,,,埼玉県川越市大字的場,136.15,136.0,第一種低層住居専用地域,,60,0.0,200,0.0,公営,公共下水,個別プロパン,宅内処理,1.0,0.0,0.0,0.0,1.0,0.0,北西,公道,4.2,9.3,,,0.0,0.0,,,0.0,0.0,,,0.0,0.0,（無）,0.0,0.0,（無）,（不要）,1680,1840,実測,更地,市街化区域,,指定無,（無）,（無）,（不要）,（不要）,（不要）,22条区域,,,農地法,,,（無）,68000,84900,48600,136.15,0.0,0.0,136.15,136.15,109.1,109.1,136.15,105.16,（不要）,（不要）,0.0,0.0,（不要）,,,,,,○,,,,,,,,,,,,,,,15.0,21,23,○,,,,,,,○,,,,,,,,,,,,,,○,,,,,,,JR川越線,的場,徒歩,13,,,,,,,,
2,2,train_0002,19800000,20213863,2.090217,2,19800000,87.07,85.94,2F/4LDK,4.0,問題なし,11.1,東,普通,整形地,普通,2.8,1,,,,,埼玉県川口市安行原,86.92,86.52,第一種低層住居専用地域,,50,0.0,100,0.0,私営,個別浄化槽,個別プロパン,宅内処理,1.0,0.0,0.0,0.0,1.0,0.0,東,私道,4.0,11.1,北,公道,3.8,5.2,,,0.0,0.0,,,0.0,0.0,（有）,62.1,0.2,（無）,（不要）,600,550,実測,建物有,市街化区域,,指定無,（無）,（無）,（不要）,（不要）,（不要）,防火指定無,,,,,,（無）,83000,116000,124000,86.92,0.0,0.0,86.92,86.92,85.96,85.96,86.92,85.96,（不要）,（不要）,0.0,0.0,（要）,35.0,○,,,,○,,,,,,,,,,,,,,,19.0,8,7,,,,,,,,,,○,,,,,○,,,,,,,,,,,,,,埼玉高速鉄道線,新井宿,徒歩,27,,,,,,,,
3,3,train_0003,33990000,34116959,0.373519,3,33990000,163.75,105.98,2F/4LDK,4.0,問題なし,10.5,南,普通,整形地,良い,4.0,2,,,,,埼玉県草加市松江,164.91,164.07,工業地域,,60,0.0,200,0.0,公営,公共下水,都市ガス,宅内処理,1.0,0.0,0.0,0.0,1.0,0.0,南,公道,4.0,13.3,,,0.0,0.0,,,0.0,0.0,,,0.0,0.0,（無）,0.0,0.3,（無）,（不要）,1220,111,公簿,更地,市街化区域,,指定無,（無）,（無）,（不要）,（不要）,（要）,防火指定無,,,,,,（無）,91000,122000,113000,164.07,0.0,0.84,164.07,164.07,105.99,105.99,164.07,105.99,（不要）,（不要）,0.0,0.0,（不要）,,,,,,○,,,,,,,,,,,,,,,12.0,16,2,○,,,,,,,○,,,,,,,,,,,,,,,,○,,,,,東武伊勢崎線,獨協大学前駅〈草加松原〉,徒歩,18,,,,,,,,
4,4,train_0004,30800000,31771868,3.155416,4,30800000,111.51,89.01,2F/4LDK,4.0,問題なし,13.5,北,良い,整形地,普通,2.0,2,床暖房付,,,,埼玉県新座市野火止,403.58,403.58,第一種低層住居専用地域,,50,0.0,80,0.0,公営,公共下水,都市ガス,宅内処理,3.0,0.0,0.0,0.0,3.0,0.0,南西,私道,4.0,13.9,北東,私道,4.0,13.9,,,0.0,0.0,,,0.0,0.0,（無）,0.0,0.1,（無）,（不要）,480,2800,公簿,更地,市街化区域,,指定無,（無）,（無）,（不要）,（不要）,（不要）,22条区域,,,,埋蔵文化財,,（無）,160000,179000,160000,334.58,0.0,0.0,111.52,111.53,89.27,89.27,111.52,88.73,（不要）,（不要）,0.0,0.0,（不要）,,○,,,,○,,,,,,,,,,,,,,,11.0,6,35,○,,,,,,,○,,,,,,,,,,,,,,,,,,,,,JR武蔵野線,新座,徒歩,7,,,,,,,,


## シリアライズしておく

In [3]:
import pickle
f = open("model_V11.pkl","wb")
pickle.dump(models, f)

## デシリアライズ

In [1]:
import pickle
f = open("model_V11.pkl", "rb")
models = pickle.load(f)

### 複数モデルから重要度を取得する

In [5]:
import pandas as pd
train_x = pd.read_csv("data/processed_train_goto_x_v11.csv").drop(['id','pj_no'],axis=1)
index = models[0].feature_importances_+models[1].feature_importances_
importances = models[0].feature_importances_
for i in range(len(models)-1):
    importances += models[i+1].feature_importances_
mean = importances / len(models)
df = pd.DataFrame(mean, index=train_x.columns)
df.to_csv("data/importance_V11.csv")

In [28]:
importances = models[0].feature_importances_
for i in range(len(models)-1):
    importances += models[i+1].feature_importances_

In [29]:
mean = importances / len(models)
df = pd.DataFrame(mean, index=x_train.columns)
df2 = pd.DataFrame(mean)

In [32]:
df.to_csv("data/tmp.csv")

In [31]:
df2.head()

Unnamed: 0,0
0,0.070948
1,0.062627
2,0.022726
3,0.000766
4,0.005862


#### 7/20 使わなくなったコード

In [None]:
def calc_values(x):
    return( pd.Series([x.min(), x.max(), x.mean(), x.median(), x.std()]))

df[['min', 'max', 'mean','median','std']]=df.apply(calc_values, axis=1)

Y_eval_pred = pd.concat([Y_eval.reset_index(), df], axis=1)

Y_eval_pred['mean_error']=abs(Y_eval_pred['keiyaku_pr']-Y_eval_pred['mean'])/Y_eval_pred['keiyaku_pr']*100
Y_eval_pred['median_error']=abs(Y_eval_pred['keiyaku_pr']-Y_eval_pred['median'])/Y_eval_pred['keiyaku_pr']*100

Y_eval_pred['mean_error'].mean()

In [26]:
import xgboost as xgb
from xgboost import XGBRegressor
# 事前準備処理
# x_train. y_train, x_eval, y_evalを作成する
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def mean_absolute_percentage_error( y_train: np.array, y_pred: np.array):
    diff = 0
    n = len(y_train)
    for i in range(n):
        diff += abs(y_train[i]-y_pred[i])/y_train[i]
    score = 100*diff / n
    
    return score


train_x = pd.read_csv("data/processed_train_goto_x_v9.csv")
train_y = pd.read_csv("data/processed_train_goto_y_v9.csv")
X_train, X_eval, Y_train, Y_eval = train_test_split( train_x, train_y, train_size=0.8, random_state = 19711022)





In [3]:
def learn( train_x, train_y, params, s ):
    model = XGBRegressor(**params, seed=s, n_jobs=-1)
    model.fit(train_x, train_y)
    
    return model

def mean_absolute_percentage_error( y_train: np.array, y_pred: np.array):
    diff = 0
    n = len(y_train)
    for i in range(n):
        diff += abs(y_train[i]-y_pred[i])/y_train[i]
    score = 100*diff / n
    
    return score


In [39]:
train_x = pd.read_csv("data/processed_train_goto_x_v9.csv")
train_y = pd.read_csv("data/processed_train_goto_y_v9.csv")
X_train, X_eval, Y_train, Y_eval = train_test_split( train_x, train_y, train_size=0.8, random_state = 19711022)

## 土地売り・建て売りに分解せずに同じことをしてみる
params = {
    'n_estimators':700,
    'max_depth':6,
    'min_child_weight':9,
    'gamma':0,
    'subsample':1.0,
    'colsample_bytree':0.6,
    'learning_rate':0.1
}
model = learn(X_train.drop(['id','pj_no'],axis=1), Y_train.drop(['id'],axis=1), params, 42)
pred_y = model.predict(X_eval.drop(['id','pj_no'],axis=1))



In [43]:
Y_pred_all = pd.DataFrame(X_eval[['id','levelplan_土地売り']].copy().reset_index(drop=True))
Y_pred_all['pred_keiyaku_pr'] = pd.Series(pred_y).astype(np.int64)
Y_eval_pred = pd.merge(Y_eval, Y_pred_all, on='id', how='left')

In [44]:
print(mean_absolute_percentage_error(Y_eval_pred['keiyaku_pr'].values, Y_eval_pred['pred_keiyaku_pr'].values))

8.58929204425


In [42]:
Y_eval_pred.head()

Unnamed: 0,id,keiyaku_pr,levelplan_土地売り,pred_keiyaku_pr
0,train_1255,21700000,0,24113422.0
1,train_2319,26300000,0,28799550.0
2,train_4409,30000000,0,32914426.0
3,train_5358,36800000,0,32592012.0
4,train_4592,21000000,0,26503988.0


In [46]:
out = Y_eval_pred
out['error']=abs((out['keiyaku_pr']-out['pred_keiyaku_pr'])/out['keiyaku_pr'])*100
output = pd.merge(out, pd.read_csv("data/processed_train_goto_x_v9.csv"),on='id')

In [48]:
output.to_csv("data/tmp.csv")

### 以降はむだだったコード。建て売りか土地売りかで別モデルを作ったが、結局意味はなかった。

In [None]:
# 土地売りと建て売りとにデータを分割
X_train_tateuri = X_train[X_train['levelplan_土地売り']==0]
X_train_tochiuri = X_train[X_train['levelplan_土地売り']==1]
Y_train_tateuri = Y_train[X_train['levelplan_土地売り']==0]
Y_train_tochiuri = Y_train[X_train['levelplan_土地売り']==1]

X_eval_tateuri = X_eval[X_eval['levelplan_土地売り']==0]
X_eval_tochiuri = X_eval[X_eval['levelplan_土地売り']==1]
Y_eval_tateuri = Y_eval[X_eval['levelplan_土地売り']==0]
Y_eval_tochiuri = Y_eval[X_eval['levelplan_土地売り']==1]

In [None]:
## 土地売り以外のlevelplanを削除してみる
X_train_1 = X_train.drop(['levelplan_1F/2LDK','levelplan_1F/3LDK','levelplan_1F/4LDK','levelplan_1F/4LDK+S','levelplan_1F/5LDK'],axis=1)
X_train_2 = X_train_1.drop(['levelplan_2F/2LDK','levelplan_2F/2LDK+S','levelplan_2F/3DK','levelplan_2F/3LDK','levelplan_2F/3LDK+2S','levelplan_2F/3LDK+S','levelplan_2F/4DK','levelplan_2F/4LDK','levelplan_2F/4LDK+S','levelplan_2F/5DK','levelplan_2F/5LDK'],axis=1)
X_train_3 = X_train_2.drop(['levelplan_3F/2LDK','levelplan_3F/2LDK+2S','levelplan_3F/2LDK+S','levelplan_3F/3DK','levelplan_3F/3LDK','levelplan_3F/3LDK+2S','levelplan_3F/3LDK+S','levelplan_3F/4DK','levelplan_3F/4LDK','levelplan_3F/4LDK+S','levelplan_3F/5LDK'],axis=1)

X_eval_1 = X_eval.drop(['levelplan_1F/2LDK','levelplan_1F/3LDK','levelplan_1F/4LDK','levelplan_1F/4LDK+S','levelplan_1F/5LDK'],axis=1)
X_eval_2 = X_eval_1.drop(['levelplan_2F/2LDK','levelplan_2F/2LDK+S','levelplan_2F/3DK','levelplan_2F/3LDK','levelplan_2F/3LDK+2S','levelplan_2F/3LDK+S','levelplan_2F/4DK','levelplan_2F/4LDK','levelplan_2F/4LDK+S','levelplan_2F/5DK','levelplan_2F/5LDK'],axis=1)
X_eval_3 = X_eval_2.drop(['levelplan_3F/2LDK','levelplan_3F/2LDK+2S','levelplan_3F/2LDK+S','levelplan_3F/3DK','levelplan_3F/3LDK','levelplan_3F/3LDK+2S','levelplan_3F/3LDK+S','levelplan_3F/4DK','levelplan_3F/4LDK','levelplan_3F/4LDK+S','levelplan_3F/5LDK'],axis=1)


In [None]:
# 学習ルーチンを呼び出す。
params = {
    'n_estimators':700,
    'max_depth':6,
    'min_child_weight':9,
    'gamma':0,
    'subsample':1.0,
    'colsample_bytree':0.6,
    'learning_rate':0.1
}

tateuri_model = learn(X_train_tateuri.drop(['id','pj_no','levelplan_土地売り'],axis=1), Y_train_tateuri.drop(['id'],axis=1), params, 42)
tochiuri_model = learn(X_train_tochiuri.drop(['id','pj_no','levelplan_土地売り'],axis=1), Y_train_tochiuri.drop(['id'],axis=1), params, 42)

In [None]:
# 予測する
pred_y_tateuri = tateuri_model.predict(X_eval_tateuri.drop(['id','pj_no','levelplan_土地売り'],axis=1))
pred_y_tochiuri = tochiuri_model.predict(X_eval_tochiuri.drop(['id','pj_no','levelplan_土地売り'],axis=1))

In [None]:
# DataFrameの形で予測値を作成する
Y_pred_tateuri = pd.DataFrame(X_eval_tateuri['id'].copy().reset_index(drop=True))
Y_pred_tateuri['pred_keiyaku_pr'] = pd.Series(pred_y_tateuri)
Y_pred_tochiuri = pd.DataFrame(X_eval_tochiuri['id'].copy().reset_index(drop=True))
Y_pred_tochiuri['pred_keiyaku_pr'] = pd.Series(pred_y_tochiuri)
Y_pred_all = pd.concat([Y_pred_tateuri, Y_pred_tochiuri])
Y_eval_pred = pd.merge(Y_eval, Y_pred_all, on='id', how='left')

In [135]:
model = learn(X_train_3.drop(['id','pj_no'],axis=1), Y_train.drop(['id'],axis=1), params, 42)
pred_y = model.predict(X_eval_3.drop(['id','pj_no'],axis=1))

In [136]:
Y_pred_all = pd.DataFrame(X_eval_3[['id','levelplan_土地売り']].copy().reset_index(drop=True))
Y_pred_all['pred_keiyaku_pr'] = pd.Series(pred_y)
Y_eval_pred = pd.merge(Y_eval, Y_pred_all, on='id', how='left')

In [137]:
print(mean_absolute_percentage_error(Y_eval_pred['keiyaku_pr'].values, Y_eval_pred['pred_keiyaku_pr'].values))

8.56302858604


In [1]:
# 共通処理
# x_train. y_train, x_eval, y_evalを作成する
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def mean_absolute_percentage_error( y_train: np.array, y_pred: np.array):
    diff = 0
    n = len(y_train)
    for i in range(n):
        diff += abs(y_train[i]-y_pred[i])/y_train[i]
    score = 100*diff / n
    
    return score


train_x = pd.read_csv("data/processed_train_goto_x_v8.csv")
train_y = pd.read_csv("data/processed_train_goto_y_v8.csv")
X_train, X_eval, Y_train, Y_eval = train_test_split( train_x, train_y, train_size=0.8, random_state = 19711022)

X_train.to_csv("data/X_train.csv", index=False)
X_eval.to_csv("data/X_eval.csv", index=False)
Y_train.to_csv("data/Y_train.csv", index=False)
Y_eval.to_csv("data/Y_eval.csv", index=False)

train_x = pd.read_csv('data/X_train.csv').drop(['id','pj_no'],axis=1)
train_y = pd.read_csv('data/Y_train.csv').drop(['id'],axis=1)

import xgboost as xgb
from xgboost import XGBRegressor
import time

params = {
    'n_estimators':700,
    'max_depth':6,
    'min_child_weight':9,
    'gamma':0,
    'subsample':1.0,
    'colsample_bytree':0.6,
    'learning_rate':0.1
}

print(f"start learning...")
xgboost_opt = XGBRegressor(**params, seed=42, n_jobs=-1)
start = time.perf_counter()
xgboost_opt.fit(train_x, train_y)
end = time.perf_counter()
print(end-start)

print(f"start estimating...")
eval_x = pd.read_csv('data/processed_train_goto_x_v8.csv').drop(['id','pj_no'],axis=1)
ans_y = pd.read_csv('data/processed_train_goto_y_v8.csv').drop(['id'],axis=1)
pred_y = xgboost_opt.predict(eval_x)
print( mean_absolute_percentage_error(ans_y.values,pred_y))

out = pd.read_csv('data/processed_train_goto_y_v8.csv')
out['pred_keiyaku_pr'] = pd.Series(pred_y).astype(np.int64)
out['error']=abs((out['keiyaku_pr']-out['pred_keiyaku_pr'])/out['keiyaku_pr'])*100
output = pd.merge(out, pd.read_csv("data/processed_train_goto_x_v8.csv"),on='id')
output.to_csv("data/train_data_error.csv")



start learning...
26.849811518000024
start estimating...
[ 3.48094355]


In [2]:
print(f"start estimating...")
eval_x = pd.read_csv('data/X_eval.csv').drop(['id','pj_no'],axis=1)
ans_y = pd.read_csv('data/Y_eval.csv').drop(['id'],axis=1)
pred_y = xgboost_opt.predict(eval_x)
print( mean_absolute_percentage_error(ans_y.values,pred_y))

out = pd.read_csv('data/Y_eval.csv')
out['pred_keiyaku_pr'] = pd.Series(pred_y).astype(np.int64)
out['error']=abs((out['keiyaku_pr']-out['pred_keiyaku_pr'])/out['keiyaku_pr'])*100
output = pd.merge(out, pd.read_csv("data/X_eval.csv"),on='id')
output.to_csv("data/eval_data_error.csv")

start estimating...
[ 8.70952682]


In [3]:
importance = pd.DataFrame(xgboost_opt.feature_importances_, index=eval_x.columns)
importance.to_csv("data/feature_importances_V8.csv")

In [4]:
test_x = pd.read_csv("data/processed_test_goto_x_v8.csv")
test_pred = xgboost_opt.predict(test_x.drop(['id','pj_no'],axis=1))
submit = pd.DataFrame(test_x[['id']])
submit['keiyaku_pr']=pd.Series(test_pred).astype(np.int64)
submit.to_csv('data/submit_v8.tsv',sep='\t',header=None, index=False)

### n_estimatorsが700のケースでsubmitしてみることにする(7/7)

In [None]:
test_x = pd.read_csv("data/processed_test_goto_x.csv")
test_pred = xgboost_opt.predict(test_x.drop(['id','pj_no'],axis=1))
submit = pd.DataFrame(test_x[['id']])
submit['keiyaku_pr']=pd.Series(test_pred).astype(np.int64)
submit.to_csv('data/submit4.tsv',sep='\t',header=None, index=False)

### ここからSageMaker用のデータを作る処理

In [None]:
train_x = pd.read_csv('data/X_train.csv')
train_y = pd.read_csv('data/Y_train.csv')

In [None]:
train_input = pd.concat([train_y.drop(['id','keiyaku_pr','tc_mseki'],axis=1),train_x.drop(['id','pj_no'],axis=1)],axis=1)
train_input.to_csv('data/sagemaker_input.csv', header=None, index=False)
eval_x = pd.read_csv('data/X_eval.csv')
eval_x.drop(['id','pj_no'],axis=1).to_csv('data/sagemaker_eval_input.csv',header=None, index=False)


### SageMakerの出力から精度を計算する

In [None]:
pred2_y = pd.read_csv('data/sagemaker_eval_input.csv.out', header=None)
ans_y = pd.read_csv('data/Y_eval.csv').drop(['id','keiyaku_pr','tc_mseki'],axis=1)

In [None]:
print( mean_absolute_percentage_error(ans_y.values,pred2_y.values))

### SageMaker用予測データを作成する

In [None]:
test_x = pd.read_csv("data/processed_test_goto_x.csv")

In [None]:
test_input = test_x.drop(['id','pj_no'],axis=1)
test_input.to_csv('data/sagemaker_test_input.csv', header=None, index=False)

### SageMaker出力からsubmit用データを作る

In [None]:
tanka = pd.read_csv("data/sagemaker_test_input.csv.out", header=None )

In [None]:
test_x = pd.read_csv("data/processed_test_goto_x.csv")

In [None]:
submit = pd.DataFrame(test_x[['id', 'tc_mseki']])

In [None]:
submit['tanka_pr']=tanka

In [None]:
submit['price']=(submit['tc_mseki']*submit['tanka_pr']).astype(np.int64)

In [None]:
submit.loc[:,['id','price']].to_csv('data/submit3.tsv',sep='\t',header=None, index=False)

In [None]:
submit.head()