# 案例 房价预测

## 导入所需库

In [2]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

## 导入数据集

In [5]:
test=pd.read_csv("test.csv")
# test.head()
train =pd.read_csv("train.csv")
# train.head()
#describe函数可以统计数据信息，不加上include=all的话默认统计数值型数据
train.describe(include="all")
# test.describe(include="all")
# print("train: \n",train.describe(include="all"))


Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
count,1460.0,1460.0,1460,1201.0,1460.0,1460,91,1460,1460,1460,...,1460.0,7,281,54,1460.0,1460.0,1460.0,1460,1460,1460.0
unique,,,5,,,2,2,4,4,2,...,,3,4,4,,,,9,6,
top,,,RL,,,Pave,Grvl,Reg,Lvl,AllPub,...,,Gd,MnPrv,Shed,,,,WD,Normal,
freq,,,1151,,,1454,50,925,1311,1459,...,,3,157,49,,,,1267,1198,
mean,730.5,56.89726,,70.049958,10516.828082,,,,,,...,2.758904,,,,43.489041,6.321918,2007.815753,,,180921.19589
std,421.610009,42.300571,,24.284752,9981.264932,,,,,,...,40.177307,,,,496.123024,2.703626,1.328095,,,79442.502883
min,1.0,20.0,,21.0,1300.0,,,,,,...,0.0,,,,0.0,1.0,2006.0,,,34900.0
25%,365.75,20.0,,59.0,7553.5,,,,,,...,0.0,,,,0.0,5.0,2007.0,,,129975.0
50%,730.5,50.0,,69.0,9478.5,,,,,,...,0.0,,,,0.0,6.0,2008.0,,,163000.0
75%,1095.25,70.0,,80.0,11601.5,,,,,,...,0.0,,,,0.0,8.0,2009.0,,,214000.0


## 数据预处理
### 删除无关列
<details>
    <summary><i>其中各列名的含义如下 （点击查看）</i></summary>

|名称|含义|
|:-:|:-:|
|SalePrice|销售价格|
|MSSubClass|建筑类
|LotFrontage |直线距离|
|LotArea|土地面积（平凡英尺）|
|Street |路的类型|
|Alley  |胡同类型|
|LotShape|房产的形状（是否规则）|
|LandCounter |房产的平坦度|
|Utitles |公共设备 设施|
|LotConfig|划分配置|
|LandSlope|斜坡程度|
|Neighborhood |在Ames city范围的物理位置|
|Condition1|接近各种条件|
|Condition2 |接近各种条件|
|BldgType |住宅的类型|
|HouseStyle |住宅的风格|
|OverallQual |给房屋整体材料和装修评分|
|OverallCond |为房屋的整体状况评分|
|YearBuilt |建造日期|
|YearRemodAdd |重新改造日期|
|RoofStyle |屋顶的类型|
|RoofMatl |屋顶材料|
|Exterior1st|房屋外部覆盖物|
|Exterior2nd|房屋外部覆盖物(不止一种)|
|MasVnrType |砌体单板类型|
|MasVnrArea |单位买诺记的砌体木皮面积|
|ExterQual |外部材料的质量评价|
|ExterCond|评估外部材料的现状|
|Foundation |基础类型|
|BsmtQual |评估地下室的高度|
|BsmtCond |评估地下室的一般状况|
|BsmtExposure |出口或花园墙壁曝光程度|
|BsmtFinType1 |地下室完工面积比|
|BsmtFinSF1 |类型1完成平方英尺|
|BsmtFinType2 |地下室完工面积比（如果不止一种）|
|BsmtFinSF2 |类型2完成平方英尺|
|BsmtUnfsF |未完成的地下室平方英尺|
|TotalBsmtSF |地下室总面积|
|Heating QC| 热暖质量和条件|
|CentraAir | 是否有中央空调|
|Electrical |电气系统|
|1stFlrSP|一楼平方英尺|
|2ndFlrSF |二楼平方英尺|
|LowQualFinSF|低质量完成平方英尺（所有地板）|
|GrLivArea|高于（地面）居住面积的平方英尺|
|BsmtFullBath|地下室完整的浴室|
|BsmtHalfBath|地下室半浴室|
|FullBath|高档浴室|
|HalfBath|高于等级的半浴室|
|Bedroom |地上的卧室|
|Kitchen| 高档厨房|
|KitchenQual |厨房质量评估|
|TotRmsAbvGrd |高档卧室（不包括浴室）|
|Functional|家庭功能|
|FireplaceQu |壁炉数量|
|FireplaceQu |壁炉质量|
|GarageType| 车库位置|
|GarageYrBlt |车库建成年份|
|GarageFinish | 车库的内部完成|
|GarageCars|车容量为单位的车库大小|
|GarageArea | 以平方英尺为单位的车库大小|
|GarageQua|车库质量|
|GarageCond |车库条件|
|PavedDrive |铺设车道|
|WoodDeckSF|木甲板面积(平方英尺的)|
|OpenPorchSF|打开门廊面积(以平方英尺)|
|EnclosedPorch|封闭门廊面积(以平方英尺)|
|3SsnPorch|门廊(三个平方英尺的)|
|ScreenPorch|屏幕门廊面积(平方英尺的)|
|PoolArea|以平方英尺为单位的泳池面积|
|PoolQC|泳池质量|
|Fence | 栅栏质量|
|MiscFeature |杂项功能未在其他类别中涵盖|
|MiscVal|杂项功能的价值|
|MoSold|月销量（MM）|
|YrSold|已售出年份（YYYY）|
|SaleType|销售类型|
|SaleCondition|销售条件|
    
</details>

In [None]:
train.drop(['Id','Product_ID'],axis=1,inplace=True)