% 线性回归模型  
$$ 
\hat{y} = w \cdot x + b  
$$  
  
% 损失函数（均方误差）  
$$ 
J(w, b) = \frac{1}{2m} \sum_{i=1}^{m} (\hat{y}^{(i)} - y^{(i)})^2  
$$ 
  
% 梯度下降更新规则  
$$ 
w := w - \alpha \frac{\partial J(w, b)}{\partial w}  
$$ 
$$ 
b := b - \alpha \frac{\partial J(w, b)}{\partial b}  
$$
  
% 偏导数  
$$
\frac{\partial J(w, b)}{\partial w} = \frac{1}{m} \sum_{i=1}^{m} (\hat{y}^{(i)} - y^{(i)}) \cdot x^{(i)}  
$$
$$
\frac{\partial J(w, b)}{\partial b} = \frac{1}{m} \sum_{i=1}^{m} (\hat{y}^{(i)} - y^{(i)})  
$$  


In [None]:
# !pip install pandas matplotlib numpy tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tqdm as tm
from itertools import islice
# 产生数据
# 设置随机种子以确保结果可复现
np.random.seed(0)
# 样本数量
n_samples = 100
# 自变量X（特征），这里我们简单地使用了一个线性递增的序列，并添加了一些随机噪声
X = 2 * np.random.rand(1, n_samples)  # 生成0到2之间的随机数
# 真实参数
true_coef = 2.5
true_intercept = 1.0
# 因变量Y（目标），根据真实参数和自变量X计算得到，并添加一些随机噪声
# + np.random.randn(n_samples, 1) * 0.5
Y = true_coef * X.squeeze() + true_intercept + np.random.randn(1, n_samples)*0.5
X, Y = X.squeeze(), Y.squeeze()
print(np.shape(X), np.shape(Y))

# print(X[:5], Y, sep="\n")


def J(y_hat, y, m): return 1/(2*m)*sum((y_hat-y)**2)
def hat(w, x, b): return w*x+b
def dw(x, y, w, b, m): return 1/m*sum((hat(w, x, b)-y)*x)
def db(x, y, w, b, m): return 1/m*sum((hat(w, x, b)-y))


def batched(iterable, n):
    # batched('ABCDEFG', 3) → ABC DEF G
    if n < 1:
        raise ValueError('n must be at least one')
    iterator = iter(iterable)
    while batch := tuple(islice(iterator, n)):
        yield batch


f, w, b, m = 1, 0, 0, n_samples
a = f
trace = []
turns = 1000
batches = 100
for i in tm.trange(turns):
    a = f/(np.log(i+np.e))
    residual = J(hat(w, X, b), Y, m)
    w, b = w-a*dw(X, Y, w, b, m), b-a*db(X, Y, w, b, m)
    trace.append([i, w, b, a, residual])
for i in batched(trace, batches):
    print(*i[-1])


flag = 1
if flag:
    plt.scatter(X, Y)
    # for i, w, b,a, residual in trace:
    #     if i % 100 == 0:
    #         plt.plot([i for i in np.linspace(0, 2, turns//batches)],
    #                  [w*i+b for i in np.linspace(0, 2, turns//batches)])

    plt.plot([i for i in np.linspace(0, 2, 100)],
            [w*i+b for i in np.linspace(0, 2, 100)],color="red")
    plt.show()


% 多项式回归模型  
$$ 
\hat{y} = w_1 \cdot x^2 + w_2 \cdot x + b  \\
p=(w_1,w_2,b)\\
\hat{y}=p\cdot X=(w_1,w_2,b)\cdot \begin{pmatrix}  
x^2 \\  
x \\  
1
\end{pmatrix} 
$$
% 损失函数（均方误差）  
$$ 
J(w_1,w_2, b) = \frac{1}{2m} \sum_{i=1}^{m} (\hat{y}^{(i)} - y^{(i)})^2  = \frac{1}{2m} \sum_{i=1}^{m} ((w_1 \cdot x_i^2 + w_2 \cdot x_i +b) - y^{(i)})^2
$$ 
  
% 梯度下降更新规则  
$$ 
w_1 := w_1 - \alpha_1 \frac{\partial J(w_1,w_2, b)}{\partial w_1}  \\
w_2 := w_2 - \alpha_2 \frac{\partial J(w_1,w_2, b)}{\partial w_2}  \\
b := b - \alpha_3 \frac{\partial J(w1, w2 ,b)}{\partial b}\\
$$
$$
p = \alpha \cdot p\\
or\\
p=p\cdot
 \begin {bmatrix}
 \alpha_1&0&0\\
 0&\alpha_2&0 \\
 0&0&\alpha_3\\
 \end{bmatrix}
$$
% 偏导数  
$$
\frac{\partial J(w_1,w_2, b)}{\partial w_1} = \frac{1}{m} \sum_{i=1}^{m} ((w_1 \cdot x_i^2 + w_2 \cdot x_i +b) - y^{(i)}) \cdot x_i^2  \\
\frac{\partial J(w_1,w_2, b)}{\partial w_2} = \frac{1}{m} \sum_{i=1}^{m} ((w_1 \cdot x_i^2 + w_2 \cdot x_i +b) - y^{(i)}) \cdot x_i \\
\frac{\partial J(w_1,w_2, b)}{\partial b} = \frac{1}{m} \sum_{i=1}^{m} ((w_1 \cdot x_i^2 + w_2 \cdot x_i +b) - y^{(i)}) \\
\Delta = \begin{pmatrix}  
\frac{\partial J(w_1,w_2, b)}{\partial w_1}\\
\frac{\partial J(w_1,w_2, b)}{\partial w_2} \\
\frac{\partial J(w_1,w_2, b)}{\partial b}
\end{pmatrix} =\frac{ 1}{m}*\sum{(\hat{y}-y)}\cdot X
$$



In [None]:
# !pip install pandas matplotlib numpy tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tqdm as tm
from itertools import islice
# 产生数据
# 设置随机种子以确保结果可复现
np.random.seed(0)
# 样本数量
n_samples = 100
# 自变量X（特征），这里我们简单地使用了一个线性递增的序列，并添加了一些随机噪声
X = 2 * np.random.rand(n_samples,1)  # 生成0到2之间的随机数
# 真实参数
true_coef1 = 20
true_coef2 = 7
true_intercept = 10.0
# 因变量Y（目标），根据真实参数和自变量X计算得到，并添加一些随机噪声
# + np.random.randn(n_samples, 1) * 0.5
Y = true_coef1 * X**2 +true_coef2 * X+ true_intercept + np.random.randn( n_samples,1)
# X, Y = X.squeeze(), Y.squeeze()
# print(np.shape(X), np.shape(Y))
def batched(iterable, n):
    # batched('ABCDEFG', 3) → ABC DEF G
    if n < 1:
        raise ValueError('n must be at least one')
    iterator = iter(iterable)
    while batch := tuple(islice(iterator, n)):
        yield batch
def J(y_hat:np.array, y:np.array): 
    m=y.shape[0]
    return 1/(2*m)*np.sum((y_hat-y.reshape(1,m))**2)
def Hat(p:np.array,x:np.array): 
    m=x.shape[0]
    return p.reshape(1,3).dot(np.vstack([(x**2).reshape(1,m),x.reshape(1,m),np.ones((1,m))]))
def delta(p:np.array,x:np.array,y:np.array):
    m=y.shape[0]
    return 1/m*(Hat(p,x)-y.reshape(1,m)).dot(np.hstack([x**2,x,np.ones((m,1))]))
p=np.array([1,1,1])# w1,w2,b
alpha=1
alpha_copy=alpha
trace = []
turns = 10000
batches = 1000
for i in tm.trange(turns):
    alpha=alpha_copy/(np.log(i+np.e))
    residual = J(Hat(p,X),Y)
    p=p-alpha*delta(p,X,Y)
    trace.append([i, p, residual])

for i in batched(trace, batches):
    print(*i[-1])

flag = 1
p=p.squeeze()
w1,w2,b=p[0],p[1],p[2]
if flag:
    plt.scatter(X, Y)
    plt.plot([i for i in np.linspace(0, 2, 100)],
            [w1*i**2+w2*i+b for i in np.linspace(0, 2, 100)],color="red")
    plt.show()


In [None]:
# todo list 
# 1.试用正则化
# 2.多元非线性回归
# 3.逻辑斯谛回归


# L1 正则化 Lasso Regression 
least absolute shrinkage and selection operator，又译最小绝对值收敛和选择算子\
$$ 
J(w, b) = \frac{1}{2m} \sum_{i=1}^{m} (\hat{y}^{(i)} - y^{(i)})^2 \\ 
J_{L1} = J(w,b) + \lambda \|w\|_1
$$
# L2 正则化 Ridge Regression
$$
J(w, b) = \frac{1}{2m} \sum_{i=1}^{m} (\hat{y}^{(i)} - y^{(i)})^2 \\ 
J = J(w, b)+ \lambda \|w\|_2^2
$$

In [None]:
# !pip install pandas matplotlib numpy tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tqdm as tm
from itertools import islice
# 产生数据
# 设置随机种子以确保结果可复现
np.random.seed(0)
# 样本数量
n_samples = 100
rng=3
# 自变量X（特征），这里我们简单地使用了一个线性递增的序列，并添加了一些随机噪声
X = rng * np.random.rand(n_samples,1)  # 生成0到rng之间的随机数
# 真实参数
true_coef1 = 0
true_coef2 = 10
true_coef3 = 7
true_intercept = 10.0
polynomial=4
# 因变量Y（目标），根据真实参数和自变量X计算得到，并添加一些随机噪声
# + np.random.randn(n_samples, 1) * 0.5
Y = true_coef1 * X**3 +true_coef2 * X**2 +true_coef3 * X+ true_intercept + np.random.randn(n_samples,1)
def batched(iterable, n):
    # batched('ABCDEFG', 3) → ABC DEF G
    if n < 1:
        raise ValueError('n must be at least one')
    iterator = iter(iterable)
    while batch := tuple(islice(iterator, n)):
        yield batch
def J(y_hat:np.array, y:np.array): 
    m=y.shape[0]
    return 1/(2*m)*np.sum((y_hat-y.reshape(1,m))**2)
def Hat(p:np.array,x:np.array): 
    m=x.shape[0]
    return p.reshape(1,polynomial).dot(np.vstack([(x**3).reshape(1,m),(x**2).reshape(1,m),x.reshape(1,m),np.ones((1,m))]))
def delta(p:np.array,x:np.array,y:np.array):
    m=y.shape[0]
    return 1/m*(Hat(p,x)-y.reshape(1,m)).dot(np.hstack([x**3,x**2,x,np.ones((m,1))]))
p=np.array([1,1,1,1])# w1,w2,w3,b
alpha=0.01
alpha_copy=alpha
trace = []
turns = 10000
batches = 1000
for i in tm.trange(turns):
    alpha=alpha_copy/(np.log(i+np.e))
    residual = J(Hat(p,X),Y)
    p=p-alpha*delta(p,X,Y)
    trace.append([i, p, residual])

for i in batched(trace, batches):
    print(*i[-1])

p=p.squeeze()
w1,w2,w3,b=p[0],p[1],p[2],p[3]


fig, axs = plt.subplots(1, 1, figsize=(14, 5))
axs.scatter(X, Y)
axs.plot([i for i in np.linspace(0, rng, rng*40)],
        [w1*i**3+w2*i**2+w3*i+b for i in np.linspace(0, rng, rng*40)],color="red")

## L1 Regression
lmd=0.1
def J(p:np.array,x:np.array, y:np.array):
    m=y.shape[0]
    return 1/(2*m)*np.sum((Hat(p,x)-y.reshape(1,m))**2)+lmd*np.sum(np.abs(p))
def Hat(p:np.array,x:np.array): 
    m=x.shape[0]
    return p.reshape(1,polynomial).dot(np.vstack([(x**3).reshape(1,m),(x**2).reshape(1,m),x.reshape(1,m),np.ones((1,m))]))
def delta(p:np.array,x:np.array,y:np.array):
    m=y.shape[0]
    return 1/m*(Hat(p,x)-y.reshape(1,m)).dot(np.hstack([x**3,x**2,x,np.ones((m,1))]))
p=np.array([1,1,1,1])# w1,w2,w3,b
alpha=0.01
alpha_copy=alpha
trace = []
turns = 10000
batches = 1000
for i in tm.trange(turns):
    alpha=alpha_copy/(np.log(i+np.e))
    residual = J(p,X,Y)
    p=p-alpha*delta(p,X,Y)
    trace.append([i, p, residual])

for i in batched(trace, batches):
    print(*i[-1])

p=p.squeeze()
w1,w2,w3,b=p[0],p[1],p[2],p[3]

axs.plot([i for i in np.linspace(0, rng, rng*40)],[w1*i**3+w2*i**2+w3*i+b for i in np.linspace(0, rng, rng*40)],color="green")
plt.show()