In [79]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

混凝土是土木工程中最重要的材料，製作時需添加7種混合材料及水泥年齡參數，會產生不同的混凝土抗壓強度，
基於安全及成本考量必須取得平衡，因此本分析將這7種混合材料及水泥年齡當為訓練參數，來推估混凝土抗壓強度。 

輸入變數(7種成分) : 1. 水泥(Cement) 2. 高爐爐渣(Blast Furnace Slag) 3. 粉煤灰(Fly Ash) 4. 水(Water) 5. 高效減水劑(Superplasticizer) 6. 粗骨料(Coarse Aggregat) 7. 細骨料(Fine Aggregate) 8. 年齡(Age) (1~ 365日）
輸出變數 : 混凝土抗壓強度(Concrete compressive strength) - MPa 
資料來源 : http://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength

### 讀取資料

In [207]:
df = pd.read_excel('Concrete_Data.xlsx') 

In [224]:
df.head(10)

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075
5,266.0,114.0,0.0,228.0,0.0,932.0,670.0,90,47.029847
6,380.0,95.0,0.0,228.0,0.0,932.0,594.0,365,43.698299
7,380.0,95.0,0.0,228.0,0.0,932.0,594.0,28,36.44777
8,266.0,114.0,0.0,228.0,0.0,932.0,670.0,28,45.854291
9,475.0,0.0,0.0,228.0,0.0,932.0,594.0,28,39.28979


 ### 確認資料內容合理值

In [209]:
df.isnull().any() 

Cement (component 1)(kg in a m^3 mixture)                False
Blast Furnace Slag (component 2)(kg in a m^3 mixture)    False
Fly Ash (component 3)(kg in a m^3 mixture)               False
Water  (component 4)(kg in a m^3 mixture)                False
Superplasticizer (component 5)(kg in a m^3 mixture)      False
Coarse Aggregate  (component 6)(kg in a m^3 mixture)     False
Fine Aggregate (component 7)(kg in a m^3 mixture)        False
Age (day)                                                False
Concrete compressive strength(MPa, megapascals)          False
dtype: bool

### 將資料格式轉為np.array以利操作

In [210]:
ConcreteArray = df.values 

In [211]:
X = (ConcreteArray[:,:8]) # 擷取7種混和物 
Y = ConcreteArray[:,8]    # 產生混凝土抗壓強度

### 70%訓練，30%測試

In [212]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size = 0.3,random_state = 168)

### 線性迴歸分析

In [213]:
from sklearn.linear_model import LinearRegression
regr = LinearRegression()
regr.fit(x_train,y_train) 

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### 預測

In [223]:
Y_pred = regr.predict(x_test) 
regr.predict([x_test[10]])  # TEST item 10

array([28.06269211])

### 準確率

In [225]:
regr.score(x_test,y_test)

0.5859256364382834