## 逻辑回归 

### 参数设置
- X, 样本数据, shape=(m,n),type:np.ndarray()
- y, 标签数据, shape=(m,1),type:np.ndarray()
- theta, 权重, shape=(1,n),type:np.ndarray()
- b, 偏差，标量, type:float

In [1]:
import numpy as np
import matplotlib.pyplot as plt

### sigmod函数 

In [2]:
def sigmod(x):
    return 1 / (np.exp(-x)+1)

In [6]:
tx = np.array([0.6,0.4,0.2])
ty = np.where(tx > 0.5)

AttributeError: 'tuple' object has no attribute 'shape'

In [14]:
def predict(theta , b , X):
    z = np.dot(X , theta.T) + b
    pro = sigmod(z)
    return (pro >= 0.5).astype(int)

In [15]:
m = 5
n = 6
b = np.random.rand()
X = np.random.randn(m , n)
sita = np.random.random_sample(size = (1,n))
predict(sita , b , X)

array([[0],
       [1],
       [1],
       [1],
       [1]])

### 梯度下降法逼近 $\theta$ 与 $b$  

In [66]:
def fit(train_X, train_y, alpha=0.1, epoch=500):
    """
    type train_X:([[]])
    type train_y:([[]])
    type alpha:float
    type epoch:int
    rtype:(([[]]),float)
    """
    # 使用梯度下降法逼近sita与b
    m , n = train_X.shape
    theta = np.random.randn(1,n)
    b = np.random.randn()
    while epoch > 0:
        pre_y = predict(theta, b, train_X)
        d_theta = np.sum((pre_y - train_y)*train_X,axis=0)/np.float(m)
        d_b    = np.sum(pre_y - train_y,axis=0)/np.float(m)
        theta -= alpha*d_theta
        b     -= alpha*d_b
        epoch -= 1
        print "the %d epoch" % (500-epoch),theta,b
    return (theta,b)

### 测试逻辑回归 

In [88]:
from sklearn.datasets import load_iris,load_breast_cancer

In [89]:
breast_cancer = load_breast_cancer()

In [90]:
bc_x = breast_cancer.data
bc_x.shape

(569, 30)

In [91]:
bc_y = breast_cancer.target

In [92]:
bc_y.shape

(569,)

In [94]:
bc_y[278:310]

array([1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 1])

In [95]:
bc_y = bc_y.reshape(-1,1)
bc_y.shape

(569, 1)

In [41]:
from sklearn.model_selection import train_test_split

In [96]:
tr_X, te_X ,tr_y ,te_y = train_test_split(bc_x,bc_y,test_size = 0.4)

In [97]:
print tr_X.shape
print te_X.shape

(341, 30)
(228, 30)


In [98]:
theta , bb = fit(tr_X,tr_y)

the 1 epoch [[-4.80948372e-01 -7.41096847e-01 -5.58924085e+00 -3.69619404e+01
  -9.66576366e-01  6.14332479e-01 -1.92785792e-02  5.78862072e-01
   7.94660906e-01 -5.77133560e-01 -1.05383042e+00  5.06100547e-01
   3.60018225e-01 -2.71915401e+00 -1.14999920e+00  7.26961943e-03
  -9.32935762e-01 -1.70668287e+00  1.03894485e+00 -1.26409472e+00
  -2.74605534e+00 -9.61237635e-02 -5.19550683e+00 -5.50939603e+01
   1.41183697e+00 -8.55083212e-01  9.56712096e-01 -4.15899074e-01
   1.41193468e+00 -1.07103947e+00]] [-1.81097955]
the 2 epoch [[ 2.74648988e-01  3.87906086e-01 -7.31082494e-01 -8.17276151e+00
  -9.60848694e-01  6.19337702e-01 -1.62725910e-02  5.80513734e-01
   8.05450261e-01 -5.73230464e-01 -1.03602770e+00  5.84969579e-01
   4.84463122e-01 -1.38907307e+00 -1.14955043e+00  8.63178483e-03
  -9.31265420e-01 -1.70605591e+00  1.04019724e+00 -1.26386096e+00
  -1.91325153e+00  1.39535424e+00  2.13413990e-01 -2.02775087e+01
   1.41960844e+00 -8.43812998e-01  9.67170810e-01 -4.11192971e-01
  

  


the 110 epoch [[ 1.60402214e+01  3.02271935e+01  9.61007386e+01  1.42884980e+02
  -7.98257301e-01  6.78770693e-01 -8.92217100e-02  5.42724716e-01
   1.11001953e+00 -4.51866294e-01 -9.37204530e-01  3.06377283e+00
   9.91196671e-01 -3.10009308e+01 -1.13491064e+00  3.52872089e-02
  -9.02941827e-01 -1.69453472e+00  1.07568438e+00 -1.25698280e+00
   1.32135587e+01  3.98978293e+01  9.24636075e+01 -1.09029034e+02
   1.63341657e+00 -7.78933684e-01  8.78004288e-01 -4.19958744e-01
   1.85340386e+00 -9.30155145e-01]] [0.18462162]
the 111 epoch [[ 1.55674883e+01  2.96265747e+01  9.29862020e+01  1.17277473e+02
  -8.01094688e-01  6.74931015e-01 -9.33954520e-02  5.40457857e-01
   1.10464686e+00 -4.53608393e-01 -9.52536231e-01  3.02871275e+00
   8.81192565e-01 -3.27050892e+01 -1.13510113e+00  3.44146910e-02
  -9.04100269e-01 -1.69495868e+00  1.07505429e+00 -1.25709375e+00
   1.26603740e+01  3.90897413e+01  8.87759037e+01 -1.43498623e+02
   1.62944508e+00 -7.88765094e-01  8.66206024e-01 -4.24750425e-01

   2.01297911e+00 -8.80727696e-01]] [0.95382983]
the 205 epoch [[ 2.16266959e+01  4.01999794e+01  1.29377786e+02  1.40161667e+02
  -7.39412202e-01  6.81027036e-01 -1.52861021e-01  5.12517277e-01
   1.22117542e+00 -4.06792349e-01 -9.22653093e-01  3.83456260e+00
   8.38772594e-01 -4.66277596e+01 -1.13025112e+00  3.97540054e-02
  -8.99281810e-01 -1.69259441e+00  1.08573989e+00 -1.25486307e+00
   1.89888769e+01  5.33502457e+01  1.26048625e+02 -1.56040148e+02
   1.71203561e+00 -8.01643282e-01  7.72062274e-01 -4.42738007e-01
   2.00786477e+00 -8.82158869e-01]] [0.93828731]
the 206 epoch [[ 2.19573428e+01  4.06560879e+01  1.31504120e+02  1.53387326e+02
  -7.37047219e-01  6.83071010e-01 -1.51660329e-01  5.13209755e-01
   1.22568164e+00 -4.05218385e-01 -9.14625732e-01  3.86308190e+00
   8.92334764e-01 -4.59930792e+01 -1.13008931e+00  4.02551966e-02
  -8.98628201e-01 -1.69235360e+00  1.08621944e+00 -1.25477658e+00
   1.93623842e+01  5.39672487e+01  1.28465297e+02 -1.39275192e+02
   1.71529251e+0

the 294 epoch [[ 2.42156285e+01  4.40396979e+01  1.44321703e+02  1.35164892e+02
  -7.15561861e-01  6.71016763e-01 -2.01619851e-01  4.91656280e-01
   1.26675862e+00 -3.87816006e-01 -9.20150160e-01  4.05964307e+00
   5.63489310e-01 -5.52243432e+01 -1.12891517e+00  3.79689098e-02
  -9.03882252e-01 -1.69344867e+00  1.08814928e+00 -1.25438608e+00
   2.20915397e+01  5.89615302e+01  1.43118854e+02 -1.35551556e+02
   1.74524660e+00 -8.32139103e-01  6.90936874e-01 -4.60619189e-01
   2.07535060e+00 -8.62623845e-01]] [1.28315534]
the 295 epoch [[ 2.40653516e+01  4.38419413e+01  1.43323994e+02  1.27135684e+02
  -7.16532325e-01  6.69625596e-01 -2.03215778e-01  4.90869805e-01
   1.26495962e+00 -3.88406989e-01 -9.24966817e-01  4.04666832e+00
   5.26076407e-01 -5.57785484e+01 -1.12898912e+00  3.76298737e-02
  -9.04339261e-01 -1.69360583e+00  1.08791253e+00 -1.25443007e+00
   2.19257186e+01  5.87013337e+01  1.41992144e+02 -1.45112846e+02
   1.74391311e+00 -8.35511276e-01  6.86819284e-01 -4.62196025e-01

the 407 epoch [[ 2.58662044e+01  4.59091260e+01  1.53127337e+02  1.03711051e+02
  -7.02284512e-01  6.50085247e-01 -2.62318582e-01  4.66487816e-01
   1.29286228e+00 -3.75691091e-01 -9.44320806e-01  4.12655219e+00
   4.74816852e-02 -6.56985895e+01 -1.12867027e+00  3.33093271e-02
  -9.13959860e-01 -1.69587506e+00  1.08853611e+00 -1.25440082e+00
   2.43105191e+01  6.20614921e+01  1.54059731e+02 -1.29200617e+02
   1.76323108e+00 -8.85194842e-01  5.80165783e-01 -4.88490966e-01
   2.11724635e+00 -8.51649051e-01]] [1.53388848]
the 408 epoch [[ 2.60560390e+01  4.61556158e+01  1.54348334e+02  1.11541813e+02
  -7.01004014e-01  6.51203763e-01 -2.61733500e-01  4.66856246e-01
   1.29527932e+00 -3.74833074e-01 -9.39735498e-01  4.14132066e+00
   7.72558788e-02 -6.53195396e+01 -1.12858921e+00  3.35962074e-02
  -9.13650101e-01 -1.69575128e+00  1.08879989e+00 -1.25435344e+00
   2.45273051e+01  6.23951020e+01  1.55457414e+02 -1.19072582e+02
   1.76497911e+00 -8.82485487e-01  5.82565853e-01 -4.87321600e-01

In [99]:
print theta , bb

[[ 2.84117944e+01  4.89670293e+01  1.67754662e+02  1.05632077e+02
  -6.81921354e-01  6.35626520e-01 -3.17174797e-01  4.43887958e-01
   1.33195803e+00 -3.59194956e-01 -9.57913856e-01  4.29782107e+00
  -3.23054972e-01 -7.49692640e+01 -1.12767186e+00  3.02289036e-02
  -9.21698344e-01 -1.69709545e+00  1.09048420e+00 -1.25420008e+00
   2.72060587e+01  6.63030082e+01  1.69703857e+02 -1.14088418e+02
   1.78964400e+00 -9.38149097e-01  4.70732949e-01 -5.12928837e-01
   2.17018424e+00 -8.37292939e-01]] [1.85676238]


In [101]:
print np.sum(predict(theta,bb[0],te_X)==te_y) / np.float(te_y.shape[0])

0.8859649122807017


  


In [102]:
print np.sum(te_y)

145
