In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

In [2]:
import pandas as pd

train = pd.read_csv("train_df_errno.csv")
test = pd.read_csv("test_df.csv")
sub = pd.read_csv("sample_submission.csv")
age = pd.read_csv("age_gender_info.csv")
train.shape, test.shape, sub.shape, age.shape

((2896, 15), (1008, 14), (150, 2), (16, 23))

### 결측치 확인하기

In [3]:
train.isnull().sum()

단지코드            0
총세대수            0
임대건물구분          0
지역              0
공급유형            0
전용면적            0
전용면적별세대수        0
공가수             0
자격유형            0
임대보증금         569
임대료           569
10분내지하철수      211
10분내버스정류장수      4
단지내주차면수         0
등록차량수           0
dtype: int64

In [4]:
test.isnull().sum()

단지코드            0
총세대수            0
임대건물구분          0
지역              0
공급유형            0
전용면적            0
전용면적별세대수        0
공가수             0
자격유형            2
임대보증금         180
임대료           180
10분내지하철수       38
10분내버스정류장수      0
단지내주차면수         0
dtype: int64

#### 1.test데이터셋 자격유형 mapping

In [5]:
test[test['자격유형'].isna()]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수
196,C2411,962,아파트,경상남도,국민임대,46.9,240,25.0,,71950000,37470,0.0,2.0,840.0
258,C2253,1161,아파트,강원도,영구임대,26.37,745,0.0,,2249000,44770,0.0,2.0,173.0


In [6]:
test.loc[196, "자격유형"] = "A"
test.loc[258, "자격유형"] = "C"
test.isnull().sum()

단지코드            0
총세대수            0
임대건물구분          0
지역              0
공급유형            0
전용면적            0
전용면적별세대수        0
공가수             0
자격유형            0
임대보증금         180
임대료           180
10분내지하철수       38
10분내버스정류장수      0
단지내주차면수         0
dtype: int64

In [7]:
train_df = train.copy(deep=True)
test_df = test.copy(deep=True)

#### 2.10분버스정류장수 결측치 처리

In [8]:
train_df.loc[train_df['10분내버스정류장수'].isnull(),:]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,등록차량수
2293,N2431,1047,아파트,경상남도,공공임대(10년),74.97,80,15.0,A,46000000,456000,,,1066.0,1214.0
2294,N2431,1047,아파트,경상남도,공공임대(10년),84.95,124,15.0,A,57000000,462000,,,1066.0,1214.0
2295,N2431,1047,아파트,경상남도,공공임대(10년),84.96,289,15.0,A,57000000,462000,,,1066.0,1214.0
2296,N2431,1047,아파트,경상남도,공공임대(10년),84.98,82,15.0,A,57000000,462000,,,1066.0,1214.0


In [9]:
train_df['10분내버스정류장수'].value_counts()

2.0     776
3.0     737
4.0     485
1.0     270
5.0     162
6.0     153
8.0      99
10.0     80
7.0      65
12.0     22
20.0     15
15.0     11
16.0      6
19.0      4
11.0      3
14.0      2
0.0       2
Name: 10분내버스정류장수, dtype: int64

In [10]:
### 최빈값인 2로 채운다.
train_df.loc[ train_df['10분내버스정류장수'].isnull(), "10분내버스정류장수"] = 2
train_df.loc[ train_df['10분내버스정류장수'].isnull(), :]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,등록차량수


#### 3.자격유형 mapping처리

In [11]:
print(train_df.자격유형.unique())
print(test_df.자격유형.unique())
mapping = { 'A':1, 'B':2, 'C':3, 'D':4, 'E':5,
            'F':6, 'G':7, 'H':8, 'I':9, 'J':10,
            'K':11, 'L':12, 'M':13, 'N':14, 'O':15  }
train_df['자격유형'] =train_df['자격유형'].map(mapping).astype(int)
test_df['자격유형'] =test_df['자격유형'].map(mapping).astype(int)
train_df.head()

['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O']
['H' 'A' 'E' 'C' 'D' 'G' 'I' 'J' 'K' 'L' 'M' 'N']


Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,등록차량수
0,C2515,545,아파트,경상남도,국민임대,33.48,276,17.0,1,9216000,82940,0.0,3.0,624.0,205.0
1,C2515,545,아파트,경상남도,국민임대,39.6,60,17.0,1,12672000,107130,0.0,3.0,624.0,205.0
2,C2515,545,아파트,경상남도,국민임대,39.6,20,17.0,1,12672000,107130,0.0,3.0,624.0,205.0
3,C2515,545,아파트,경상남도,국민임대,46.9,38,17.0,1,18433000,149760,0.0,3.0,624.0,205.0
4,C2515,545,아파트,경상남도,국민임대,46.9,19,17.0,1,18433000,149760,0.0,3.0,624.0,205.0


#### 4.10분내지하철수결측치 처리

In [12]:
train_df['10분내지하철수'].unique()

array([ 0.,  1., nan,  2.,  3.])

In [13]:
print(train_df['10분내지하철수'].value_counts())

0.0    2246
1.0     400
2.0      33
3.0       6
Name: 10분내지하철수, dtype: int64


In [14]:
#결측치 처리
train_df['10분내지하철수'] = train_df['10분내지하철수'].fillna(0.0)
test_df['10분내지하철수'] = test_df['10분내지하철수'].fillna(0.0)

#### 5.임대보증금 결측치 처리하기

In [15]:
train_df.loc[ train_df['임대보증금'].isnull(), :]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,등록차량수
80,C1925,601,상가,강원도,임대상가,32.10,1,9.0,4,,,0.0,4.0,117.0,75.0
81,C1925,601,상가,강원도,임대상가,32.10,1,9.0,4,,,0.0,4.0,117.0,75.0
82,C1925,601,상가,강원도,임대상가,32.10,1,9.0,4,,,0.0,4.0,117.0,75.0
83,C1925,601,상가,강원도,임대상가,72.16,1,9.0,4,,,0.0,4.0,117.0,75.0
93,C1874,619,상가,충청남도,임대상가,12.62,1,2.0,4,,,0.0,2.0,97.0,62.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2311,C1350,1401,아파트,대전광역시,공공분양,74.94,22,2.0,4,,,0.0,6.0,1636.0,2315.0
2312,C1350,1401,아파트,대전광역시,공공분양,84.94,164,2.0,4,,,0.0,6.0,1636.0,2315.0
2313,C1350,1401,아파트,대전광역시,공공분양,84.94,19,2.0,4,,,0.0,6.0,1636.0,2315.0
2314,C1350,1401,아파트,대전광역시,공공분양,84.96,26,2.0,4,,,0.0,6.0,1636.0,2315.0


In [16]:
train_df['지역'].unique()

array(['경상남도', '대전광역시', '경기도', '전라북도', '강원도', '광주광역시', '충청남도', '부산광역시',
       '제주특별자치도', '울산광역시', '충청북도', '전라남도', '경상북도', '대구광역시', '서울특별시',
       '세종특별자치시'], dtype=object)

In [17]:
train_df["임대보증금"] = train_df["임대보증금"].fillna(0)
train_df.loc[train_df["임대보증금"] == "-", "임대보증금"] = 0
train_df["임대보증금"] = train_df["임대보증금"].astype(int)

In [67]:
test_df["임대보증금"] = test_df["임대보증금"].fillna(0)
test_df.loc[test_df["임대보증금"] == "-", "임대보증금"] = 0
test_df["임대보증금"] = test_df["임대보증금"].astype(int)

#### 경상남도 임대보증금 처리

In [18]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경상남도')  )
val = group1['임대보증금'].mean()
val

16160436.314363144

In [20]:
c = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "경상남도")].index
train_df.loc[c, "임대보증금"] = val

In [68]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경상남도')  )
val = group1['임대보증금'].mean()
val

27713976.470588237

In [69]:
c = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "경상남도")].index
test_df.loc[c, "임대보증금"] = val

#### 대전광역시 임대보증금 처리

In [22]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '대전광역시')  )
val = group1['임대보증금'].mean()
val

12931199.2481203

In [24]:
a = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "대전광역시")].index
train_df.loc[a, "임대보증금"] = val

In [70]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '대전광역시')  )
val = group1['임대보증금'].mean()
val

14195269.23076923

In [73]:
a = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "대전광역시")].index
test_df.loc[a, "임대보증금"] = val

#### 경기도 임대보증금 처리

In [26]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경기도')  )
val = group1['임대보증금'].mean()
val

32787275.747508306

In [27]:
b = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "경기도")].index
train_df.loc[b, "임대보증금"] = val

In [74]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경기도')  )
val = group1['임대보증금'].mean()
val

32036721.11553785

In [75]:
b = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "경기도")].index
test_df.loc[b, "임대보증금"] = val

#### 전라북도 임대보증금 처리

In [28]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '전라북도')  )
val = group1['임대보증금'].mean()
val

19154669.724770643

In [29]:
d = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "전라북도")].index
train_df.loc[d, "임대보증금"] = val

In [76]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '전라북도')  )
val = group1['임대보증금'].mean()
val

20900205.128205128

In [77]:
d = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "전라북도")].index
test_df.loc[d, "임대보증금"] = val

#### 강원도 임대보증금 처리

In [32]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '강원도')  )
val = group1['임대보증금'].mean()
val

18220585.242604133

In [33]:
e = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "강원도")].index
train_df.loc[e, "임대보증금"] = val

In [78]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '강원도')  )
val = group1['임대보증금'].mean()
val

8682783.018867925

In [80]:
e = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "강원도")].index
test_df.loc[e, "임대보증금"] = val

#### 광주광역시 임대보증금 처리

In [34]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '광주광역시')  )
val = group1['임대보증금'].mean()
val

28588851.063829787

In [35]:
f = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "광주광역시")].index
train_df.loc[f, "임대보증금"] = val

In [81]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '광주광역시')  )
val = group1['임대보증금'].mean()
val

22726500.0

In [83]:
f = test_df[(train_df["임대보증금"] == 0) & (test_df["지역"] == "광주광역시")].index
test_df.loc[f, "임대보증금"] = val

  f = test_df[(train_df["임대보증금"] == 0) & (test_df["지역"] == "광주광역시")].index


#### 충청남도 임대보증금 처리

In [36]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '충청남도')  )
val = group1['임대보증금'].mean()
val

9905058.227848101

In [37]:
g = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "충청남도")].index
train_df.loc[g, "임대보증금"] = val

In [84]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '충청남도')  )
val = group1['임대보증금'].mean()
val

10396682.92682927

In [85]:
g = test_df[(train_df["임대보증금"] == 0) & (test_df["지역"] == "충청남도")].index
test_df.loc[g, "임대보증금"] = val

  g = test_df[(train_df["임대보증금"] == 0) & (test_df["지역"] == "충청남도")].index


#### 부산광역시 임대보증금 처리

In [38]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '부산광역시')  )
val = group1['임대보증금'].mean()
val

7469609.756097561

In [39]:
h = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "부산광역시")].index
train_df.loc[h, "임대보증금"] = val

In [86]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '부산광역시')  )
val = group1['임대보증금'].mean()
val

11924727.272727273

In [87]:
h = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "부산광역시")].index
test_df.loc[h, "임대보증금"] = val

#### 제추특별자치도 임대보증금 처리

In [40]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '제주특별자치도')  )
val = group1['임대보증금'].mean()
val

12575681.318681318

In [41]:
i = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "제주특별자치도")].index
train_df.loc[i, "임대보증금"] = val

In [88]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '제주특별자치도')  )
val = group1['임대보증금'].mean()
val

14751730.76923077

In [89]:
i = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "제주특별자치도")].index
test_df.loc[i, "임대보증금"] = val

#### 울산광역시 임대보증금 처리

In [42]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '울산광역시')  )
val = group1['임대보증금'].mean()
val

28439970.588235293

In [43]:
j = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "울산광역시")].index
train_df.loc[j, "임대보증금"] = val

In [90]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '울산광역시')  )
val = group1['임대보증금'].mean()
val

10147479.452054795

In [91]:
j = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "울산광역시")].index
test_df.loc[j, "임대보증금"] = val

#### 충청북도 임대보증금 처리

In [44]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '충청북도')  )
val = group1['임대보증금'].mean()
val

22785411.042944785

In [45]:
k = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "충청북도")].index
train_df.loc[k, "임대보증금"] = val

In [92]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '충청북도')  )
val = group1['임대보증금'].mean()
val

21051803.92156863

In [93]:
k = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "충청북도")].index
test_df.loc[k, "임대보증금"] = val

#### 전라남도 임대보증금 처리

In [46]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '전라남도')  )
val = group1['임대보증금'].mean()
val

15990530.612244898

In [47]:
l = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "전라남도")].index
train_df.loc[l, "임대보증금"] = val

In [94]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '전라남도')  )
val = group1['임대보증금'].mean()
val

18153432.43243243

In [97]:
l = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "전라남도")].index
test_df.loc[l, "임대보증금"] = val

#### 대구광역시 임대보증금 처리

In [48]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '대구광역시')  )
val = group1['임대보증금'].mean()
val

26201235.29411765

In [49]:
m = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "대구광역시")].index
train_df.loc[m, "임대보증금"] = val

In [98]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '대구광역시')  )
val = group1['임대보증금'].mean()
val

24287055.555555556

In [99]:
m = test_df[(test_df["임대보증금"] == 0) & (test_df["지역"] == "대구광역시")].index
test_df.loc[m, "임대보증금"] = val

#### 서울특별시 임대보증금 처리

In [50]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '서울특별시')  )
val = group1['임대보증금'].mean()
val

87293234.375

In [51]:
s = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "서울특별시")].index
train_df.loc[s, "임대보증금"] = val

#### 세종특별자치시 임대보증금 처리

In [52]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '세종특별자치시')  )
val = group1['임대보증금'].mean()
val

39162500.0

In [53]:
q = train_df[(train_df["임대보증금"] == 0) & (train_df["지역"] == "세종특별자치시")].index
train_df.loc[q, "임대보증금"] = val

In [105]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '세종특별자치시')  )
val = group1['임대보증금'].mean()
val

21100473.684210528

In [106]:
q = test_df[(train_df["임대보증금"] == 0) & (test_df["지역"] == "세종특별자치시")].index
test_df.loc[q, "임대보증금"] = val

  q = test_df[(train_df["임대보증금"] == 0) & (test_df["지역"] == "세종특별자치시")].index


### 임대보증금 null처리 확인

In [55]:
train_df[train_df["임대보증금"] == 0].index

Int64Index([], dtype='int64')

#### 6.임대료 결측치 처리하기

In [111]:
train_df["임대료"] = train_df["임대료"].fillna(0)
train_df.loc[train_df["임대료"] == "-", "임대료"] = 0
train_df["임대료"] = train_df["임대료"].astype(int)

In [112]:
test_df["임대료"] = test_df["임대료"].fillna(0)
test_df.loc[test_df["임대료"] == "-", "임대료"] = 0
test_df["임대료"] = test_df["임대료"].astype(int)

#### 경상남도 임대료 처리

In [113]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경상남도')  )
val = group1['임대료'].mean()
val

121628.26558265583

In [114]:
c = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "경상남도")].index
train_df.loc[c, "임대료"] = val

In [115]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경상남도')  )
val = group1['임대료'].mean()
val

151825.64705882352

In [116]:
c = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "경상남도")].index
test_df.loc[c, "임대료"] = val

#### 대전광역시 임대료처리

In [117]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '대전광역시')  )
val = group1['임대료'].mean()
val

95261.05263157895

In [118]:
a = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "대전광역시")].index
train_df.loc[a, "임대료"] = val

In [119]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '대전광역시')  )
val = group1['임대료'].mean()
val

104696.53846153847

In [120]:
a = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "대전광역시")].index
test_df.loc[a, "임대료"] = val

#### 경기도 임대료 처리

In [121]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경기도')  )
val = group1['임대료'].mean()
val

227820.84717607973

In [122]:
b = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "경기도")].index
train_df.loc[b, "임대료"] = val

In [123]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경기도')  )
val = group1['임대료'].mean()
val

228166.9721115538

In [124]:
b = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "경기도")].index
test_df.loc[b, "임대료"] = val

#### 전라북도 임대료 처리

In [125]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '전라북도')  )
val = group1['임대료'].mean()
val

134853.3027522936

In [126]:
d = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "전라북도")].index
train_df.loc[d, "임대료"] = val

In [127]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '전라북도')  )
val = group1['임대료'].mean()
val

122518.20512820513

In [128]:
d = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "전라북도")].index
test_df.loc[d, "임대료"] = val

#### 강원도 임대료 처리¶

In [129]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '강원도')  )
val = group1['임대료'].mean()
val

149024.4385026738

In [130]:
e = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "강원도")].index
train_df.loc[e, "임대료"] = val

In [131]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '강원도')  )
val = group1['임대료'].mean()
val

90065.47169811321

In [132]:
e = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "강원도")].index
test_df.loc[e, "임대료"] = val

#### 광주광역시 임대료 처리

In [133]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '광주광역시')  )
val = group1['임대료'].mean()
val

163677.7304964539

In [134]:
f = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "광주광역시")].index
train_df.loc[f, "임대료"] = val

In [135]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '광주광역시')  )
val = group1['임대료'].mean()
val

131236.30434782608

In [136]:
f = test_df[(train_df["임대료"] == 0) & (test_df["지역"] == "광주광역시")].index
test_df.loc[f, "임대료"] = val

  f = test_df[(train_df["임대료"] == 0) & (test_df["지역"] == "광주광역시")].index


#### 충청남도 임대료 처리

In [137]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '충청남도')  )
val = group1['임대료'].mean()
val

62474.81012658228

In [138]:
g = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "충청남도")].index
train_df.loc[g, "임대료"] = val

In [139]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '충청남도')  )
val = group1['임대보증금'].mean()
val

10396682.92682927

In [140]:
g = test_df[(train_df["임대료"] == 0) & (test_df["지역"] == "충청남도")].index
test_df.loc[g, "임대료"] = val

  g = test_df[(train_df["임대료"] == 0) & (test_df["지역"] == "충청남도")].index


#### 부산광역시 임대료 처리

In [141]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '부산광역시')  )
val = group1['임대료'].mean()
val

62818.53658536585

In [142]:
h = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "부산광역시")].index
train_df.loc[h, "임대료"] = val

In [143]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '부산광역시')  )
val = group1['임대료'].mean()
val

94874.77272727272

In [144]:
h = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "부산광역시")].index
test_df.loc[h, "임대료"] = val

#### 제주특별자치도 임대료 처리

In [145]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '제주특별자치도')  )
val = group1['임대료'].mean()
val

114688.02197802198

In [146]:
i = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "제주특별자치도")].index
train_df.loc[i, "임대료"] = val

In [147]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '제주특별자치도')  )
val = group1['임대료'].mean()
val

128065.0

In [149]:
i = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "제주특별자치도")].index
test_df.loc[i, "임대료"] = val

#### 울산광역시 임대료 처리

In [157]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '울산광역시')  )
val = group1['임대료'].mean()
val

222456.76470588235

In [158]:
j = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "울산광역시")].index
train_df.loc[j, "임대료"] = val

In [159]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '울산광역시')  )
val = group1['임대료'].mean()
val

56980.27397260274

In [160]:
j = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "울산광역시")].index
test_df.loc[j, "임대료"] = val

#### 충청북도 인대료 처리

In [161]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '충청북도')  )
val = group1['임대료'].mean()
val

163466.87116564417

In [162]:
k = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "충청북도")].index
train_df.loc[k, "임대료"] = val

In [163]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '충청북도')  )
val = group1['임대료'].mean()
val

146157.45098039217

In [164]:
k = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "충청북도")].index
test_df.loc[k, "임대료"] = val

#### 전라남도 인대료 처리

In [165]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '전라남도')  )
val = group1['임대료'].mean()
val

133239.25170068027

In [166]:
l = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "전라남도")].index
train_df.loc[l, "임대료"] = val

In [167]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '전라남도')  )
val = group1['임대료'].mean()
val

125647.02702702703

In [168]:
l = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "전라남도")].index
test_df.loc[l, "임대료"] = val

#### 경상북도 임대료 처리

In [169]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경상북도')  )
val = group1['임대료'].mean()
val

146360.63063063062

In [172]:
l = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "경상북도")].index
train_df.loc[l, "임대료"] = val

In [173]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '경상북도')  )
val = group1['임대료'].mean()
val

156279.1379310345

In [174]:
l = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "경상북도")].index
test_df.loc[l, "임대료"] = val

#### 대구광역시 임대료처리

In [175]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '대구광역시')  )
val = group1['임대료'].mean()
val

255381.27450980392

In [176]:
m = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "대구광역시")].index
train_df.loc[m, "임대료"] = val

In [177]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '대구광역시')  )
val = group1['임대료'].mean()
val

184355.83333333334

In [178]:
m = test_df[(test_df["임대료"] == 0) & (test_df["지역"] == "대구광역시")].index
test_df.loc[m, "임대료"] = val

#### 서울특별시 임대료 처리

In [179]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '서울특별시')  )
val = group1['임대료'].mean()
val

369625.9375

In [180]:
s = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "서울특별시")].index
train_df.loc[s, "임대료"] = val

#### 세종특별자치시 임대료 처리

In [181]:
grouped = train_df.groupby([ '지역'])
group1 = grouped.get_group( ( '세종특별자치시')  )
val = group1['임대료'].mean()
val

437166.6666666667

In [182]:
q = train_df[(train_df["임대료"] == 0) & (train_df["지역"] == "세종특별자치시")].index
train_df.loc[q, "임대료"] = val

In [183]:
grouped = test_df.groupby([ '지역'])
group1 = grouped.get_group( ( '세종특별자치시')  )
val = group1['임대료'].mean()
val

158392.1052631579

In [184]:
q = test_df[(train_df["임대료"] == 0) & (test_df["지역"] == "세종특별자치시")].index
test_df.loc[q, "임대료"] = val

  q = test_df[(train_df["임대료"] == 0) & (test_df["지역"] == "세종특별자치시")].index


In [186]:
train_df.isnull().sum()

단지코드          0
총세대수          0
임대건물구분        0
지역            0
공급유형          0
전용면적          0
전용면적별세대수      0
공가수           0
자격유형          0
임대보증금         0
임대료           0
10분내지하철수      0
10분내버스정류장수    0
단지내주차면수       0
등록차량수         0
dtype: int64

In [187]:
test_df.isnull().sum()

단지코드          0
총세대수          0
임대건물구분        0
지역            0
공급유형          0
전용면적          0
전용면적별세대수      0
공가수           0
자격유형          0
임대보증금         0
임대료           0
10분내지하철수      0
10분내버스정류장수    0
단지내주차면수       0
dtype: int64