## Linear regression
$$ Input(X,y) \mapsto Generalization(z = f(\mathcal{w}X)) \mapsto Prediction(\hat{y}) $$
$$ f(s) = \frac{1}{1 + e^{-s}} $$
$$ threshold = 0.5 $$
- Loss function:
$$ J(w) = -\sum_{i=1}^N(y_i \log {z}_i + (1-y_i) \log (1 - {z}_i))$$
- Find W:
> - Stochastic Gradient Descent:
$$ w = w + \eta(y_i - z_i)x_i $$
___
- Multinomial Logistic Regression:(Softmax)
> $$ Input(X,y) \mapsto Generalization(P_k = f(\mathcal{w}X)_k) \mapsto Prediction(\hat{y}) $$
> $$ f(s)_k = \frac{\exp(s_i)}{\sum_{j=1}^C \exp(s_j)}, ~~ \forall i = 1, 2, \dots, C $$
___
- Note:
___
- Resources:
> - Book: Hands_on_machine_learning
> - Blog: [Machinelearningcoban](https://machinelearningcoban.com/2016/12/28/linearregression/)
> - CS229 note

In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

iris = load_iris()
iris.keys()

dict_keys(['feature_names', 'target_names', 'data', 'DESCR', 'target'])

In [6]:
iris_X = iris['data']
iris_y = (iris['target'] == 2).astype(np.int)
iris_X, iris_y = shuffle( iris_X, iris_y)
iris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(iris_X, iris_y, test_size = 0.2)

In [7]:
from sklearn.linear_model import LogisticRegression
lgr = LogisticRegression()
lgr.fit(iris_X_train, iris_y_train)
lgr_pro = lgr.predict_proba(iris_X_test)

def convert_to_float(input_list):
    final_list = []
    for i in input_list:
        final_list.append(float(i))
    return final_list

lgr_pro1 = [convert_to_float(x) for x in lgr_pro]
lgr_pro1

[[0.8537034749105638, 0.14629652508943622],
 [0.999770882779512, 0.00022911722048803956],
 [0.9999499900439668, 5.000995603320993e-05],
 [0.9998894943833992, 0.00011050561660080232],
 [0.9120809305597452, 0.08791906944025481],
 [0.999812289816246, 0.00018771018375396165],
 [0.28509477476650424, 0.7149052252334958],
 [0.3892503483039884, 0.6107496516960116],
 [0.9999885825511509, 1.1417448849153753e-05],
 [0.5516759348722915, 0.44832406512770856],
 [0.610073111147478, 0.389926888852522],
 [0.7427775987281993, 0.2572224012718008],
 [0.05914874096862499, 0.940851259031375],
 [0.9999197031337124, 8.029686628762306e-05],
 [0.8823932686739491, 0.11760673132605089],
 [0.17635460813399995, 0.823645391866],
 [0.2196269041483072, 0.7803730958516928],
 [0.6797756755419236, 0.32022432445807636],
 [0.8994006448396327, 0.10059935516036732],
 [0.27795887999525015, 0.7220411200047498],
 [0.6792109770627377, 0.3207890229372623],
 [0.35677689804147406, 0.643223101958526],
 [0.9097046537973432, 0.0902953

In [8]:
lgr_predict = lgr.predict(iris_X_test)
lgr_predict

array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 1, 0, 1, 1])

In [12]:
from sklearn.metrics import log_loss

log_loss_score = log_loss(iris_y_test, lgr_pro1)
log_loss_score

0.21969275682045508

### SOFTMAX

In [15]:
softmax = LogisticRegression(multi_class = 'multinomial', solver ='lbfgs', C = 10)

iris_X1, iris_y1 = shuffle( iris.data, iris.target)
iris_X1_train, iris_X1_test, iris_y1_train, iris_y1_test = train_test_split(iris_X1, iris_y1)

softmax.fit(iris_X1_train, iris_y1_train)
softmax_predict = softmax.predict(iris_X1_test)
print(softmax_predict)

[2 2 2 1 1 0 1 2 0 2 2 0 1 2 2 1 1 0 2 1 1 0 2 1 0 0 2 0 1 0 0 1 1 2 0 2 1
 1]


In [16]:
softmax.score(iris_X1_test, iris_y1_test)

0.97368421052631582