-
Notifications
You must be signed in to change notification settings - Fork 0
/
algorithm.py
91 lines (70 loc) · 2.29 KB
/
algorithm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
LR = 0.01
Step = 10000
# 获取鸢尾花数据
def create_Data():
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
data = np.array(df.iloc[:100, :])
return data[:, :-1], data[:, -1]
# sigmoid 函数
def Sigmoid(x):
return 1 / (1 + np.exp(-x))
# 损失函数
def Loss_function(x, y, W, b):
total_loss = 0
for x_i, y_i in zip(x, y):
temp = np.dot(np.transpose(W), x_i) + b
y_hat = Sigmoid(temp)
loss = -1 * (y_i * np.log2(y_hat) + (1 - y_i) * np.log2(1-y_hat))
total_loss += loss
return total_loss/len(x)
#优化器(使用梯度下降法的地方,默认使用的是SGD)
def Optimizer(x, y, W, b, LR = 0.001, Type = 'SGD'):
#SGD
if Type == 'SGD':
index = np.array([i for i in range(len(x))]) #将数据打乱
for i in index:
x_i, y_i = x[i], y[i]
y_hat = Sigmoid(np.dot(np.transpose(W), x_i) + b)
W = W - LR * (y_hat - y_i) * x_i
b = b - LR * (y_hat - y_i)
return W, b
# 训练
def Train(X, Y):
#初始化W和b
W = np.random.randn(X.shape[1])
b = 0
for step in range(Step):
W, b = Optimizer(X, Y, W, b, LR)
if step % 1000 == 0:
loss = Loss_function(X, Y, W, b)
print("Loss is :", loss)
parameter = {'W': W,
'b': b}
return parameter
#预测
def Predict(X, Y, parameter):
W = parameter['W']
b = parameter['b']
total = len(X)
cnt = 0
for x, y in zip(X, Y):
#预测值>=0.5 归为label =1,预测值<0.5,归为label = 0
y_hat = 1 if Sigmoid(np.dot(np.transpose(W), x) + b) >= 0.5 else 0
if y_hat == y:
cnt += 1
print("ACC:", cnt/total)
if __name__ == '__main__':
X, Y = create_Data()
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.3)
print("the number of train:", len(train_X))
print("the number of test:", len(test_X))
parameter = Train(train_X, train_Y)
Predict(test_X, test_Y, parameter)