## 多元線性回歸
- y = w1**x1 + w2*\*x2 + ... + wn*xn + b8
- 機器學習過程：**準備好資料 --> 設定一個模型 --> 設定 cost function --> 設定 optmizer**

In [None]:
import pandas as pd

url = "https://raw.githubusercontent.com/GrandmaCan/ML/main/Resgression/Salary_Data2.csv"
data = pd.read_csv(url)
data

Unnamed: 0,YearsExperience,EducationLevel,City,Salary
0,3.0,大學,城市A,45.9
1,7.8,碩士以上,城市C,80.5
2,2.3,高中以下,城市A,25.2
3,5.1,高中以下,城市A,30.4
4,10.0,碩士以上,城市B,65.7
5,1.2,碩士以上,城市C,60.8
6,8.6,大學,城市C,50.1
7,6.9,碩士以上,城市A,70.3
8,4.2,大學,城市A,40.7
9,2.4,高中以下,城市A,28.1


- Label Encoding

In [None]:
data['EducationLevel'] = data['EducationLevel'].map({'高中以下':0,'大學':1,'碩士以上':2})
data

Unnamed: 0,YearsExperience,EducationLevel,City,Salary
0,3.0,1,城市A,45.9
1,7.8,2,城市C,80.5
2,2.3,0,城市A,25.2
3,5.1,0,城市A,30.4
4,10.0,2,城市B,65.7
5,1.2,2,城市C,60.8
6,8.6,1,城市C,50.1
7,6.9,2,城市A,70.3
8,4.2,1,城市A,40.7
9,2.4,0,城市A,28.1


- One Hot Encoding

In [None]:
#用兩個特徵即可表示(非A非B即C)
from sklearn.preprocessing import OneHotEncoder

onehot_encoder = OneHotEncoder()
onehot_encoder.fit(data[['City']])  #傳入須為二維矩陣(故多加一對中括號)，
city_encoded = onehot_encoder.transform(data[['City']]).toarray()  #轉出為稀疏sparse矩陣，所以在後方加上toarray回傳完整的矩陣
city_encoded

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [None]:
#(橫行直列)
data[["CityA", "CityB", "CityC"]] = city_encoded
data

Unnamed: 0,YearsExperience,EducationLevel,City,Salary,CityA,CityB,CityC
0,3.0,1,城市A,45.9,1.0,0.0,0.0
1,7.8,2,城市C,80.5,0.0,0.0,1.0
2,2.3,0,城市A,25.2,1.0,0.0,0.0
3,5.1,0,城市A,30.4,1.0,0.0,0.0
4,10.0,2,城市B,65.7,0.0,1.0,0.0
5,1.2,2,城市C,60.8,0.0,0.0,1.0
6,8.6,1,城市C,50.1,0.0,0.0,1.0
7,6.9,2,城市A,70.3,1.0,0.0,0.0
8,4.2,1,城市A,40.7,1.0,0.0,0.0
9,2.4,0,城市A,28.1,1.0,0.0,0.0


In [None]:
data = data.drop(['City', 'CityC'], axis=1)
data

Unnamed: 0,YearsExperience,EducationLevel,Salary,CityA,CityB
0,3.0,1,45.9,1.0,0.0
1,7.8,2,80.5,0.0,0.0
2,2.3,0,25.2,1.0,0.0
3,5.1,0,30.4,1.0,0.0
4,10.0,2,65.7,0.0,1.0
5,1.2,2,60.8,0.0,0.0
6,8.6,1,50.1,0.0,0.0
7,6.9,2,70.3,1.0,0.0
8,4.2,1,40.7,1.0,0.0
9,2.4,0,28.1,1.0,0.0


In [None]:
from sklearn.model_selection import train_test_split
x = data[["YearsExperience", "EducationLevel", "CityA", "CityB"]]
y = data["Salary"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=87)  #random_state: 讓每次分割結果相同(不同數字結果會不同)
# len(x), len(x_train), len(x_test)
#將原本比較好看的pandas格式，轉換成numpy以利後續計算
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
x_test

array([[3.5, 1. , 1. , 0. ],
       [6.5, 2. , 1. , 0. ],
       [1.2, 2. , 0. , 0. ],
       [2.5, 0. , 1. , 0. ],
       [1.7, 2. , 0. , 0. ],
       [2.3, 0. , 1. , 0. ],
       [7.8, 2. , 0. , 0. ],
       [6.9, 2. , 1. , 0. ]])

In [None]:
import numpy as np

w = np.array([1,2,3,4])
b = 1
y_pred = (x_train*w).sum(axis=1) + b  #將每一橫列做加總

In [None]:
((y_train - y_pred)**2).mean()

1772.9485714285713

In [None]:
def compute_cost(x, y, w, b):
  y_pred = (x*w).sum(axis=1) + b
  cost = ((y - y_pred)**2).mean()
  return cost

In [None]:
#計算w,b的斜率
y_pred = (x_train*w).sum(axis=1) + b
b_gradient = (y_pred - y_train).mean()
w_gradient = np.zeros(x_train.shape[1])  #x_train.shape[1] 取得資料有四列(28*4)

for i in range(x_train.shape[1]):
  w_gradient[i] = (x_train[:, i]*(y_pred - y_train)).mean()

w_gradient, b_gradient

(array([-246.27071429,  -56.33571429,  -16.40357143,   -6.58571429]),
 -39.53571428571429)

In [None]:
def compute_gradient(x, y, w, b):
  y_pred = (x*w).sum(axis=1) + b
  w_gradient = np.zeros(x.shape[1])
  b_gradient = (y_pred - y).mean()
  for i in range(x.shape[1]):
    w_gradient[i] = (x[:, i]*(y_pred - y)).mean()

  return w_gradient, b_gradient

In [None]:
w = np.array([1,2,3,4])
b = 1
compute_gradient(x_train, y_train, w, b)  #結果與上方一致

(array([-246.27071429,  -56.33571429,  -16.40357143,   -6.58571429]),
 -39.53571428571429)

In [None]:
#更新參數
w = np.array([1, 2, 3, 4])
b = 1
learning_rate = 0.001

w_gradient, b_gradient = compute_gradient(x_train, y_train, w, b)
print(compute_cost(x_train, y_train, w, b))

w = w - w_gradient*learning_rate
b = b - b_gradient*learning_rate
w, b
print(compute_cost(x_train, y_train, w, b))

1772.9485714285713
1644.4246833787713


- Gradient Decsent (直接複製之前的即可，寫法幾乎一樣)

In [None]:
np.set_printoptions(formatter={'float': '{: .2e}'.format})
def gradient_descent(x, y, w_init, b_init, learning_rate, cost_function, gradient_function, run_iter, p_iter=1000):

  c_hist = []
  w_hist = []
  b_hist = []

  w = w_init
  b = b_init

  for i in range(run_iter):
    w_gradient, b_gradient = gradient_function(x, y, w, b)

    w = w - w_gradient*learning_rate
    b = b - b_gradient*learning_rate
    cost = cost_function(x, y, w, b)

    w_hist.append(w)
    b_hist.append(b)
    c_hist.append(cost)

    if i%p_iter == 0:
      print(f"Iteration {i:5} : Cost {cost: .4e}, w: {w}, b: {b: .2e}, w_gradient: {w_gradient}, b_gradient: {b_gradient: .2e}")

  return w, b, w_hist, b_hist, c_hist

In [None]:
w_init = np.array([1, 2, 3, 4])
b_init = 0
learning_rate = 1.0e-2
run_iter = 10000

w_final, b_final, w_hist, b_hist, c_hist = gradient_descent(x_train, y_train, w_init, b_init, learning_rate, compute_cost, compute_gradient, run_iter)

Iteration     0 : Cost  7.7693e+02, w: [ 3.52e+00  2.58e+00  3.17e+00  4.07e+00], b:  4.05e-01, w_gradient: [-2.52e+02 -5.75e+01 -1.69e+01 -6.73e+00], b_gradient: -4.05e+01
Iteration  1000 : Cost  3.4316e+01, w: [ 2.09e+00  2.07e+01  4.34e+00 -5.78e+00], b:  1.24e+01, w_gradient: [ 1.59e-02  8.17e-03  3.27e-01  5.42e-01], b_gradient: -3.84e-01
Iteration  2000 : Cost  2.8295e+01, w: [ 2.03e+00  2.02e+01  1.74e+00 -9.40e+00], b:  1.54e+01, w_gradient: [ 5.43e-03  4.97e-02  1.97e-01  2.26e-01], b_gradient: -2.38e-01
Iteration  3000 : Cost  2.6516e+01, w: [ 1.96e+00  1.98e+01  1.98e-01 -1.09e+01], b:  1.74e+01, w_gradient: [ 7.07e-03  3.50e-02  1.18e-01  8.68e-02], b_gradient: -1.64e-01
Iteration  4000 : Cost  2.5863e+01, w: [ 1.89e+00  1.95e+01 -7.48e-01 -1.14e+01], b:  1.88e+01, w_gradient: [ 6.81e-03  2.51e-02  7.50e-02  2.58e-02], b_gradient: -1.17e-01
Iteration  5000 : Cost  2.5569e+01, w: [ 1.83e+00  1.93e+01 -1.36e+00 -1.15e+01], b:  1.98e+01, w_gradient: [ 5.86e-03  1.84e-02  5.00e

In [None]:
w_final, b_final

(array([ 1.65e+00,  1.88e+01, -2.56e+00, -1.11e+01]), 22.012515948734354)

In [None]:
y_pred = (w_final*x_test).sum(axis=1) + b_final
pd.DataFrame({
    "y_pred": y_pred,
    "y_test": y_test
})

Unnamed: 0,y_pred,y_test
20,44.039183,43.8
16,67.80015,72.7
5,61.6091,60.8
27,23.580445,24.9
32,62.434658,56.3
2,23.250222,25.2
1,72.506458,80.5
7,68.460596,70.3


In [None]:
compute_cost(x_test, y_test, w_final, b_final)  #誤差比訓練集小

16.897171994862138

- **特徵縮放 Feature Scalling**
> 加速 gradient descent，其中一個經典方法為標準化 standardization。

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
#經過特徵縮放，下降速度明顯快很多
w_init = np.array([1, 2, 3, 4])
b_init = 0
learning_rate = 1.0e-2
run_iter = 10000

w_final, b_final, w_hist, b_hist, c_hist = gradient_descent(x_train, y_train, w_init, b_init, learning_rate, compute_cost, compute_gradient, run_iter)

Iteration     0 : Cost  2.7729e+03, w: [ 1.06e+00  2.13e+00  2.89e+00  4.02e+00], b:  5.09e-01, w_gradient: [-5.97e+00 -1.28e+01  1.06e+01 -1.56e+00], b_gradient: -5.09e+01
Iteration  1000 : Cost  2.5311e+01, w: [ 3.75e+00  1.39e+01 -1.79e+00 -3.52e+00], b:  5.09e+01, w_gradient: [-8.99e-02 -5.68e-02 -7.79e-02  6.91e-02], b_gradient: -2.20e-03
Iteration  2000 : Cost  2.5223e+01, w: [ 4.09e+00  1.41e+01 -1.49e+00 -3.76e+00], b:  5.09e+01, w_gradient: [-7.65e-03 -4.70e-03 -7.13e-03  5.20e-03], b_gradient: -9.50e-08
Iteration  3000 : Cost  2.5222e+01, w: [ 4.12e+00  1.41e+01 -1.46e+00 -3.78e+00], b:  5.09e+01, w_gradient: [-6.46e-04 -4.01e-04 -6.08e-04  4.37e-04], b_gradient: -4.11e-12
Iteration  4000 : Cost  2.5222e+01, w: [ 4.12e+00  1.41e+01 -1.46e+00 -3.78e+00], b:  5.09e+01, w_gradient: [-5.48e-05 -3.41e-05 -5.16e-05  3.71e-05], b_gradient: -3.50e-13
Iteration  5000 : Cost  2.5222e+01, w: [ 4.12e+00  1.41e+01 -1.46e+00 -3.78e+00], b:  5.09e+01, w_gradient: [-4.64e-06 -2.89e-06 -4.38e

In [None]:
# 5.3 碩士以上 城市A
# 7.2 高中以下 城市B
x_real = np.array([[5.3, 2, 1, 0], [7.2, 0, 0, 1]])
x_real = scaler.transform(x_real)  #特徵縮放
y_real = (w_final*x_real).sum(axis=1) + b_final
y_real

array([ 4.42e+01, -3.08e+01])

## 邏輯回歸 Logistic Regression
- 在分類問題 Classification 上非常好用。
- 透過 **Sigmoid Function** (s型函數)，將回歸線彎曲，結果會介於0 ~ -1之間。

In [None]:
import pandas as pd

url = "https://raw.githubusercontent.com/GrandmaCan/ML/main/Classification/Diabetes_Data.csv"
data = pd.read_csv(url)
data

Unnamed: 0,Age,Weight,BloodSugar,Gender,Diabetes
0,25,119,130.8,男生,1
1,66,102,128.1,女生,1
2,59,65,103.9,男生,0
3,46,117,94.8,女生,0
4,45,79,57.8,男生,0
...,...,...,...,...,...
395,72,63,97.1,女生,0
396,22,117,156.7,女生,1
397,31,76,119.0,男生,0
398,36,65,96.3,女生,1


In [None]:
data['Gender'] = data['Gender'].map({'男生':1,'女生':0})
data

Unnamed: 0,Age,Weight,BloodSugar,Gender,Diabetes
0,25,119,130.8,1,1
1,66,102,128.1,0,1
2,59,65,103.9,1,0
3,46,117,94.8,0,0
4,45,79,57.8,1,0
...,...,...,...,...,...
395,72,63,97.1,0,0
396,22,117,156.7,0,1
397,31,76,119.0,1,0
398,36,65,96.3,0,1


In [None]:
from sklearn.model_selection import train_test_split
x = data[["Age", "Weight", "BloodSugar", "Gender"]]
y = data["Diabetes"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=86)
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
x_train, x_test

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
x_test

- Sigmoid function

In [None]:
import numpy as np
def sigmoid(z):
  return 1/(1+np.exp(-z))

In [None]:
w = np.array([1, 2, 3, 4])
b = 1
z = (w*x_train).sum(axis=1) + b
sigmoid(z)