In [None]:
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt

iris = datasets.load_iris()

In [None]:
list(iris.keys()), iris['data'].shape, iris['target'].shape

In [None]:
# sepal width, sepal length, petal width, petal length
iris['data'][:5]

In [None]:
iris['target']

# Learning Model
## 1. Hypothesis sets
### A simple hypothesis set - the 'perceptron'
For $d$ dimensional input $\mathbf{x} = (x_1,\dots,x_d)$ 'attributes of an iris'  
$$
\begin{align}
\textrm{Approve NOT iris setosa if} \quad \sum_{i=1}^{d}w_{i}x_{i} &> \textsf{threshold,} \\
\textrm{Deny NOT iris setosa if} \quad \sum_{i=1}^{d}w_{i}x_{i} &< \textsf{threshold.}
\end{align}
$$

Let this $h \in \mathcal{H}$ :  
$$
h(\mathbf{x}) = \mathsf{sign} \left( \left( \sum_{i=1}^{d} w_{i}x_{i} \right) - \textsf{threshold}  \right)
$$


Introduce an artificial coordiante $x_0 = 1$ and let $ -\textsf{threshold}=w_0$:
$$
\begin{align}
h(\mathbf{x}) &= \mathsf{sign} \left( \left( \sum_{i=1}^{d} w_{i}x_{i} \right) + w_{0}  \right) \\
h(\mathbf{x}) &= \mathsf{sign} \left( \sum_{i=0}^{d} w_{i}x_{i} \right) \\ 
h(\mathbf{x}) &= \mathsf{sign} (\mathbf{w}^{\top}\mathbf{x})
\end{align}
$$
## 2. Learning Algorithm
### PLA - A simple perceptron learning algorithm
Given the training set of $N$ samples:
$$
(\mathbf{x}_1, y_1), (\mathbf{x}_2, y_2), \dots, (\mathbf{x}_N, y_N)
$$

- strategy:  
    pick a misclassified point, say $n$th point:
    $$
    \mathsf{sign}(\mathbf{w}^{\top}\mathbf{x}_n) \neq y_n
    $$
    and update the weight vector:
    $$
    \boxed{\mathbf{w} \leftarrow \mathbf{w} + y_n \mathbf{x}_n}
    $$
    At iteration $epoch = 1, 2, 3, \dots, $ pick one among the misclassified points  
    and run a PLA iteration on it.

In [None]:
# d - dimensional features
d = 2

# X points of N - samples: iris['data'][:, (1, 3)]
X = iris['data'][:,1:4:2]

# training- samples
N = len(X)

# labels: 종 인덱스가 0이면 -1, 그밖의 경우는 +1
y = iris['target'].copy()
y = np.array([1 if approve > 0 else -1 for approve in y])

In [None]:
# artificial variable 1 과 w0를 도입한다. 
# initial W
W = np.array([0.5, -1, 2])
X = np.hstack((np.ones((len(X), 1)), X))

In [None]:
# W에 따른 가설집합의 한 원소가설이 내놓는 값
def hypothesis(X, W):
    return np.sign(np.sum(W*X, axis=1))

In [None]:
def draw(X, W, y, epoch):
    w0, w1, w2 = W
    plt.scatter(X[:,1], X[:, 2], c=y, cmap='viridis')
    xline = np.linspace(min(X[:,1]), max(X[:,1]))
    plt.plot(xline, -w1*xline/w2 - w0/w2)
    plt.grid()
    plt.show()
    print("Epoch: ", epoch)
    print("Weights: ", W)

In [None]:
# iteration fitting 과정
def fit(X_, W_, y_, epochs):
    for epoch in range(epochs):
        # 한 번iteration할 때마다
        # 잘못 판단한 점을 골라내기 전에 미리 y*x을 계산하고 y[:,np.newaxis]*X
        # 이 중에서 잘못 판단한 점을 마스킹[hypothesis(X, W) != y]로 하여 골라내기
        wrong = (y_[:,np.newaxis]*X_)[hypothesis(X_, W_) != y_]
        # 그 중에서 하나를 택하여 W를 업데이트 한다.
        if len(wrong) != 0:
            W_ = W_ + wrong[np.random.randint(len(wrong))]
            if epoch < 5 | (not epoch % 5):
                draw(X_, W_, y_, epoch)            
    draw(X_, W_, y_, epoch)
    return W_

In [None]:
%matplotlib inline
fit(X, W, y, 100)                  

# scikit-learn을 사용한 분류

In [None]:
X = iris['data'][:, 3:]
y = (iris['target'] == 2).astype(int)

In [None]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X, y)

In [None]:
X_new = np.linspace(0, 3, 1000).reshape(-1, 1)
y_proba = log_reg.predict_proba(X_new)
plt.scatter(X, y, c=y, cmap='winter')
plt.plot(X_new, y_proba[:, 1], "g-", label="Iris virginica")
plt.plot(X_new, y_proba[:, 0], "b--", label="Not Iris virginica")

In [None]:
log_reg.predict([[1.7], [1.5]])

In [None]:
X = iris['data'][:, (2, 3)]
y = iris['target']

softmax_reg = LogisticRegression(multi_class="multinomial", solver='lbfgs', C=10)
softmax_reg.fit(X, y)

In [None]:
softmax_reg.predict([[5, 2]])

In [None]:
softmax_reg.predict_proba([[5, 2]])

https://home.work.caltech.edu/homework/hw1.pdf  
• The Perceptron Learning Algorithm
In this problem, you will create your own target function f and data set D to see
how the Perceptron Learning Algorithm works. Take d = 2 so you can visualize the
problem, and assume $X = [−1, 1] × [−1, 1]$ with uniform probability of picking each
x ∈ X .  
In each run, choose a random line in the plane as your target function f (do this by
taking two random, uniformly distributed points in $[−1, 1] × [−1, 1]$ and taking the
line passing through them), where one side of the line maps to +1 and the other maps
to −1. Choose the inputs xn of the data set as random points (uniformly in X ), and
evaluate the target function on each xn to get the corresponding output yn.  
Now, in each run, use the Perceptron Learning Algorithm to find g. Start the PLA
with the weight vector w being all zeros (consider sign(0) = 0, so all points are initially misclassified), and at each iteration have the algorithm choose a point randomly
from the set of misclassified points. We are interested in two quantities: the number
of iterations that PLA takes to converge to g, and the disagreement between f and
g which is $P[f(x) \neq g(x)]$ (the probability that f and g will disagree on their classification of a random point). You can either calculate this probability exactly, or
approximate it by generating a sufficiently large, separate set of points to estimate it.
In order to get a reliable estimate for these two quantities, you should repeat the
experiment for 1000 runs (each run as specified above) and take the average over
these runs

In [1]:
import numpy as np
import matplotlib.pyplot as plt

runs = 1000
N = 100
d = 2
interval = [-1, 1]

In [2]:
# 더미변수를 도입한 f, g 레벨 함수 
def level_function(x, w):
    return np.sign(np.sum(w*x, axis=1))    

In [3]:
# create line f(x) for (2,) array 'x', given pts 2X2 array of points
def create_function():
    coord = np.array([np.random.uniform(*interval, size=2) for i in range(d)])
    # 각각 x1에서의 차이, x2에서의 차이: (d,) array
    diff = np.diff(coord).reshape((2,))

    # finding 0 level line
    if diff[0].any():
        slope = diff[1]/diff[0]
        x2_intercept = coord[1, 0] - slope * coord[0, 0]
        line = lambda x1: slope*x1 + x2_intercept
    else:
        line = coord[0, 0]

    # a normal vector and the sign function: +1 if + level, -1 if - level, 0 if 0 level
    normal_vector = np.array([[0, -1], [1, 0]]).dot(diff)
    w0 = - (normal_vector[0]*coord[0, 0] + normal_vector[1]*coord[1, 0])
    w = np.hstack((np.array([w0]), normal_vector))
    
   
    return line, lambda x: level_function(x, w)

In [4]:
# choose N random points and their labels
def choose_inputs(N):
    x1, x2 = np.array([np.random.uniform(*interval, size=N) for i in range(d)])    
    points = np.hstack((np.ones((N,1)),x1[:,np.newaxis], x2[:,np.newaxis]))
    return x1, x2, points

In [5]:
# for sample points x, their label y and iteration itr
# finding fitting weight w by PLA
def fit_pla(x,  y, itr, w=None):
    # initial column weight vector
    w = np.zeros((3,))    
    
    iterations_to_converge = 0
    while(iterations_to_converge < itr):
        misclassified = (y.reshape(-1, 1)*x)[np.not_equal(level_function(x, w), y)]
        if misclassified.any():
            # random,choice는 1-d array인 경우만 가능하므로
            index = np.random.choice(len(misclassified))
            w = w + misclassified[index]
        else:
            break
        iterations_to_converge += 1
    return iterations_to_converge, w

In [6]:
# runs = 1000 번 실험하기
iteration = N*2
itred = []
experiments = 100
disagree_prob =[]
for run in range(runs):
    # target f function 만들기
    line_f, f = create_function()
    # N sample points 만들기
    x1, x2, points = choose_inputs(N)
    # y labels 알아내기
    y = f(points)
    
    # 학습
    itr, w = fit_pla(points, y, iteration)
    
    # 반복실행 횟수
    itred.append(itr)
#     x1, x2, x = choose_inputs(experiments)
#     g = level_function(x, w)
#     disagree_prob.append(np.mean(f(x) == g_))
    for i in range(experiments):
        x1, x2, x = choose_inputs(1)
        disagree_prob.append(f(x) != level_function(x, w))
        
    

print(np.mean(itred), np.mean(disagree_prob))

79.174 0.01581


In [None]:
np.hstack((np.ones((100,1)),x1[:,np.newaxis], x2[:,np.newaxis]))

In [None]:
pts_ = np.vstack((np.ones((experiment,))[np.newaxis,:], [x1, x2])).T

In [None]:
fuc = lambda x: level_function(x, w)
fuc([[1, 2, 1]])

In [None]:
k =3.
l = np.array([1, -2])
np.hstack((np.array([k]), l))

In [None]:
normal_vector = np.array([[0, -1], [1, 0]]).dot([1, 1])
normal_vector

In [None]:
experiment = 100
coord = np.array([np.random.uniform(*interval, size=experiment) for i in range(d)])
pts_ = np.vstack((np.ones((experiment,))[np.newaxis,:], coord)).T
np.sum(g(pts_, w) == f(*coord))

In [None]:
x1, x2 = np.array([np.random.uniform(*interval, size=experiment) for i in range(d)])
# pts_ = np.vstack((np.ones((experiment,))[np.newaxis,:], points_)).T
# pts_

In [None]:
# coordi = np.empty((d, 2))
coordi = np.array([np.random.uniform(*interval, size=2) for i in range(d)])
np.diff(coordi)

In [None]:
# x_ = np.hstack((np.ones((len(selected), 1)), selected))
x_[x_[:,0] != np.ones((10,))], 

In [None]:
x_[np.equal(x_[:,0], np.ones((10,)))]

In [None]:
np.sum(np.ones((3,))*x_, axis=1)
# np.sign(np.sum(w.T*x, axis=1))

In [None]:
points = 2 * np.random.random_sample((2, 2)) - 1
x1 = np.linspace(-0.99, 0.99)
x2, f = create_target_function(points)
plt.scatter(points[:,0], points[:,1])
plt.plot(x1, x2(x1))
plt.ylim(-1, 1)
f(.75, .90), f(0, 0)

In [None]:
pts1 = 2 * np.random.random_sample((2, 2)) - 1
pts2 = 2 * np.random.random_sample((2, 2)) - 1
x1 = np.linspace(-.99, .99)
x2 = x1.copy()
X, Y = np.meshgrid(x1, x2)
# np.hstack((x1[:,np.newaxis], x2[:, np.newaxis]))
X.shape

In [None]:
f1 = lambda x, y: x + y
f1(1, 2)
f1(X, Y)

In [None]:
points = 2 * np.random.random_sample((2, 2)) - 1
# points = np.array([[-.5, .7], [.5, -2]])
_, f = create_target_function(points)
plt.figure(figsize=(6, 6))
im = plt.imshow(f(X, Y), interpolation='bilinear', origin='lower',
                cmap='RdBu_r', extent=(-1, 1, -1, 1))

cp = plt.contour(X, Y, f(X, Y), np.arange(-10, 10), cmap='jet', alpha=.75)
plt.clabel(cp, fontsize=12)
plt.colorbar()
plt.show()
f(0, 0)

In [None]:
# np.arange(-f, 10)
end_points = f(1, 1), f(-1, 1), f(-1, -1), f(1, -1)
pt = np.round(end_points)
print(end_points,"\n", np.min(end_points), np.max(end_points))
np.arange(np.min(end_points), np.max(end_points), (-np.min(end_points) + np.max(end_points))/10)
np.arange(np.min(pt), np.max(pt))

In [None]:
a, f = (1, (lambda x1, x2 : x1 + x2))
!None

In [None]:
_, f = create_target_function(points)
f(X, Y)

In [None]:
np.array([[0, -1], [1, 0]]).dot([2, 1]).dot([2, 1])

In [None]:
np.random.uniform([-1, 1], 2)

In [None]:
ynxn[y != hypothesis(X, W)]
# np.random.randint(len(z))    
# wrongX, wrong_y = (X, y)[y != hypothesis(X, W)]
# wrongX[np.random.randint(len(wrongX))]
# np.random.randint(len(X[y != hypothesis(X, W)])),np.random.randint(len(X[y != hypothesis(X, W)])),np.random.randint(len(X[y != hypothesis(X, W)]))

In [None]:
plt.scatter(iris.data[:,1], iris.data[:, 3], c=iris.target, cmap='viridis')

In [None]:
iris.data[:,1].shape, iris.data[:, 3].shape

In [None]:
[i for i in range(2) if i > 0]
[i if i == 0  else 1 for i in iris.target]
plt.scatter([i[1] for i in iris.data if i[3] < 0.75], [i[3] for i in iris.data if i[3] < 0.75])

In [None]:
iris.target[iris['target'] == 0].shape

In [None]:
X1 = iris['data'][:, 1:2]
X2 = iris['data'][:, 3:]
X1[:2], X2[:2]

In [None]:
plt.scatter(X1, X2)

In [None]:
y = iris['target'].copy()
y[y > 0] = 1
y[ y == 0] = -1
y, iris['target']

In [None]:
plt.scatter(iris['data'][:, 1], iris['data'][:, 3], c=y, cmap='viridis')

In [None]:
X = iris['data'][:, 1:4:2]
X[:5]

In [None]:
# initial weights
W0 = np.array([0.3, -1, 2])

len(X)
for x, y_ in zip(X, y):
    print(y_, x, np.sign(W0.dot(x)) == y_, W0 + y_*x)

In [None]:
def fit(X, W, y, epoch):
    for itr in range(epoch):
        i = 0
        for xi, yi in zip(X, y):
            if np.sign(W.dot(xi)) != yi:
                i += 1
                W = W + yi * xi
    return W
w0, w1, w2 = fit(X, W0, y, 20)
            
            
print(w0, w1, w2)        

In [None]:
plt.scatter(X[:,1], X[:, 2], c=y, cmap='viridis')
xline = np.linspace(min(X[:,1]), max(X[:,1]))
plt.plot(xline, -w1*xline/w2 - w0/w2)

In [None]:
np.ones((1, len(X))).shape, X.T.shape
# np.stack((np.ones(len(X)), X), axis=-1)

In [None]:
X = np.hstack((np.ones((len(X), 1)), X))

In [None]:
X.shape

In [None]:
X[:,1]

In [None]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X[:,3:], y)

In [None]:
iris.data[:5]

In [None]:
np.sign(np.sum(W*X, axis=1))


In [None]:
(W*X)[:2]

In [None]:
is_none?