### Урок 3. Логистическая регрессия. Log Loss

In [1107]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set(style="whitegrid")
sns.set_context("paper", font_scale=2) 

In [1108]:
%matplotlib inline
plt.style.use('seaborn-ticks')
plt.rcParams.update({'font.size': 14})

### Logistic Regression

### Домашние задания

In [1109]:
from scipy.stats import mode

In [1110]:
df = pd.read_csv('./3_logisticRegression/framingham1000.csv')
X = df[['male', 'age', 'education', 'currentSmoker', 'cigsPerDay', 'glucose']]
y = df[['TenYearCHD']]
values = {'education': float(X.education.mode()), 
         'cigsPerDay': float(X.cigsPerDay.mode()),
         'glucose': float(X.glucose.mode())}
X = X.fillna(values)

In [1111]:
y.isna().sum(), X.isna().sum()

(TenYearCHD    0
 dtype: int64,
 male             0
 age              0
 education        0
 currentSmoker    0
 cigsPerDay       0
 glucose          0
 dtype: int64)

In [1112]:
def calc_std_feat(x):
    res = (x - x.mean()) / x.std()
    return res
X_st = X.copy()
cols = ['age', 'education', 'cigsPerDay', 'glucose']
for col in cols:
    X_st[col] = calc_std_feat(X_st[col])

X_st.T.values.shape

(6, 999)

In [1113]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_st, y, test_size = 0.4,random_state=42) 
X_train, X_test, y_train, y_test = X_train.T, X_test.T, y_train.T, y_test.T

#### 1. *Измените функцию calc_logloss так, чтобы нули по возможности не попадали в np.log (как вариант - np.clip).  

$$Logloss=-y \ln(p) - (1-y)\ln(1-p)$$

In [1114]:
def calc_logloss(y, y_pred):
    y_pred = np.clip(y_pred, 1e-12, 1)
    err = np.mean(- y * np.log(y_pred) - (1.0 - y) * np.log(1.0 - y_pred))
    return err

#### 2. Подберите аргументы функции eval_LR_model для логистической регрессии таким образом, чтобы log loss был минимальным.

In [1115]:
def sigmoid(z):
    res = 1 / (1 + np.exp(-z))
    return res

In [1116]:
def eval_LR_model(X, y, iterations, alpha=1e-4):
    np.random.seed(42)
    w = np.random.randn(X.shape[0])
    n = X.shape[1]
    errors = []
    
    for i in range(1, iterations + 1):
        z = np.dot(w, X)
        y_pred = sigmoid(z)
        y_pred = np.clip(y_pred, 1e-6, 0.9999)
        err = calc_logloss(y, y_pred)
        w = w - alpha * (1/n * np.dot((y_pred - y), X.T))
        
        if i % (iterations / 10) == 0:
            errors.append(err)
            print(i, w, err)
            
    return w, errors

In [1117]:
delim = '-' * 8
best_alpha = 0
err = np.inf
alphas = [1e-6, 1e-4, 1e-2, 0.1, 0.111, 8e-4, 8e-9]
# alphas = [0.1, 0.1999, 0.111, 0.5, 0.9, 1.1, 1.5, 1.9, 2.6, 10]
for a in alphas:
    print(delim + f' α = {a} ' + delim)
    w, errors = eval_LR_model(X_train.values, y_train.values, iterations=1000, alpha=a)

    if errors[-1] < err:
        err = errors[-1]
        best_alpha = a
print(f'logloss: {err}\tbest_alpha: {best_alpha}')

-------- α = 1e-06 --------
100 [[ 0.49669094 -0.13824843  0.6476775   1.52299702 -0.23415976 -0.23412426]] 1.2580146415722466
200 [[ 0.49666773 -0.13823256  0.64766647  1.52296418 -0.23416614 -0.23411156]] 1.2579927129544195
300 [[ 0.49664452 -0.13821668  0.64765543  1.52293134 -0.23417253 -0.23409886]] 1.2579707849026742
400 [[ 0.49662131 -0.13820081  0.64764439  1.5228985  -0.23417891 -0.23408616]] 1.2579488574170132
500 [[ 0.4965981  -0.13818494  0.64763336  1.52286566 -0.23418529 -0.23407347]] 1.2579269304974379
600 [[ 0.49657489 -0.13816907  0.64762232  1.52283283 -0.23419168 -0.23406077]] 1.2579050041439497
700 [[ 0.49655168 -0.1381532   0.64761129  1.52279999 -0.23419806 -0.23404807]] 1.2578830783565504
800 [[ 0.49652847 -0.13813733  0.64760025  1.52276715 -0.23420444 -0.23403538]] 1.2578611531352417
900 [[ 0.49650526 -0.13812146  0.64758921  1.52273432 -0.23421082 -0.23402268]] 1.2578392284800253
1000 [[ 0.49648206 -0.13810559  0.64757818  1.52270148 -0.23421721 -0.23400998]] 

#### 3. Создайте функцию calc_pred_proba, возвращающую предсказанную вероятность класса 1 (на вход подаются веса, которые уже посчитаны функцией eval_LR_model и X, на выходе - массив y_pred_proba).

In [1118]:
def calc_pred_proba(w, X):
    return sigmoid(np.dot(w, X))

In [1119]:
w, err = eval_LR_model(X_train.values, y_train.values, iterations=20000, alpha=best_alpha)

2000 [[-0.6154564   0.54048347  0.11242931 -2.79640843  1.42188692  0.2486447 ]] 0.43557376569849593
4000 [[-0.58207771  0.54595793  0.1122301  -2.86136329  1.44552723  0.24736286]] 0.4355276828751495
6000 [[-0.58065702  0.54619809  0.11222159 -2.86417265  1.44655338  0.2473097 ]] 0.43552759786151335
8000 [[-0.58059486  0.54620862  0.11222122 -2.86429566  1.44659832  0.24730738]] 0.43552759769861454
10000 [[-0.58059213  0.54620908  0.11222121 -2.86430105  1.44660029  0.24730728]] 0.43552759769830185
12000 [[-0.58059202  0.5462091   0.1122212  -2.86430129  1.44660038  0.24730727]] 0.4355275976983013
14000 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275976983012
16000 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275976983013
18000 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275976983012
20000 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275

#### 4. Создайте функцию calc_pred, возвращающую предсказанный класс (на вход подаются веса, которые уже посчитаны функцией eval_LR_model и X, на выходе - массив y_pred).

In [1120]:
def calc_pred(w, X):
    y_pred_proba = calc_pred_proba(w, X)
    return (y_pred_proba > 0.5).astype('int')

y_pred = calc_pred(w, X_train.values)
y_pred.shape

(1, 599)

#### 5. Посчитайте accuracy, матрицу ошибок, precision и recall, а также F1-score.

In [1121]:
def calc_accuracy(target, prediction):
    is_equal = (target == prediction).astype('int')
#     print(target.T)
#     print(prediction.T)
#     print(is_equal.T)
    return float(sum(is_equal) / len(is_equal))

In [1122]:
def make_confusion_matrix(prediction, target):
    if len(target) == len(prediction):

        models_ind = ['a(x) = 1', 'a(x) = 0']
        true_cols = ['y = 1', 'y = 0']
        df = pd.DataFrame(np.zeros((2, 2)), index=models_ind, columns=true_cols)
        
        true_positive = np.equal(prediction, 1) & np.equal(target, 1)
        true_negative = np.equal(prediction, 0) & np.equal(target, 0)
        false_positive = np.equal(prediction, 1) & np.equal(target, 0)
        false_negative = np.equal(prediction, 0) & np.equal(target, 1)
        df.iloc[[0], 0] = true_positive.sum()
        df.iloc[[1], 1] = true_negative.sum()
        df.iloc[[0], 1] = false_positive.sum()
        df.iloc[[1], 0] = false_negative.sum()
        return df
    else:
        return 'target and prediction arrays must have the same lengths'

In [1123]:
conf_matrix = make_confusion_matrix(y_pred, y_train.values)
conf_matrix

Unnamed: 0,y = 1,y = 0
a(x) = 1,9.0,17.0
a(x) = 0,81.0,492.0


In [1124]:
def calc_precision(confusion_matrix):
    if isinstance(confusion_matrix, pd.DataFrame):
        df = confusion_matrix.copy()
        TP, FP = float(df.iloc[[0], 0]), float(df.iloc[[0], 1])
        return TP / (FP + TP)
    else:
        return 'unexpected type of matrix'

In [1125]:
def calc_recall(confusion_matrix):
    if isinstance(confusion_matrix, pd.DataFrame):
        df = confusion_matrix.copy()
        TP, FN = float(df.iloc[[0], 0]), float(df.iloc[[1], 0])
        return TP / (TP + FN)
    else:
        return 'unexpected type of matrix'

In [1126]:
def calc_F_score(precision, recall):
    return (2 * precision * recall) / (precision + recall) 

In [1127]:
def calc_F_beta_score(precision, recall, beta=10):
    return (1 + beta**2) * (precision * recall) / (beta**2 * (precision + recall)) 

In [1128]:
accuracy = calc_accuracy(y_train.values.T, y_pred.T)
precision = calc_precision(conf_matrix)
recall = calc_recall(conf_matrix)
fscore = calc_F_score(precision, recall)
f_beta_score = calc_F_beta_score(precision, recall)
print(f'Accuracy:\t{accuracy}\nPrecision:\t{precision}\nRecall: \t{recall}\nF-score:\t{fscore}\nF-beta-score:\t{f_beta_score}')

Accuracy:	0.8363939899833055
Precision:	0.34615384615384615
Recall: 	0.1
F-score:	0.15517241379310348
F-beta-score:	0.07836206896551724


In [1129]:
delim = '-' * 8
best_alpha = 0
err = np.inf
alphas = [1e-6, 1e-4, 1e-2, 0.1, 0.111, 0.9, 8e-4, 8e-9]
for a in alphas:
    print(delim + f' α = {a} ' + delim)
    w, errors = eval_LR_model(X_test.values, y_test.values, iterations=1000, alpha=a)

    if errors[-1] < err:
        err = errors[-1]
        best_alpha = a
print(f'logloss: {err}\tbest_alpha: {best_alpha}')

-------- α = 1e-06 --------
100 [[ 0.49669002 -0.13824617  0.64767204  1.52299845 -0.23416184 -0.23413061]] 1.2476557876807033
200 [[ 0.49666588 -0.13822804  0.64765554  1.52296704 -0.2341703  -0.23412426]] 1.2476329712652854
300 [[ 0.49664175 -0.13820991  0.64763905  1.52293564 -0.23417876 -0.23411791]] 1.2476101554828551
400 [[ 0.49661761 -0.13819178  0.64762255  1.52290423 -0.23418722 -0.23411156]] 1.2475873403334163
500 [[ 0.49659348 -0.13817365  0.64760605  1.52287283 -0.23419568 -0.23410521]] 1.2475645258169725
600 [[ 0.49656935 -0.13815552  0.64758956  1.52284142 -0.23420414 -0.23409886]] 1.2475417119335261
700 [[ 0.49654521 -0.1381374   0.64757306  1.52281002 -0.2342126  -0.23409251]] 1.2475188986830819
800 [[ 0.49652108 -0.13811927  0.64755656  1.52277861 -0.23422106 -0.23408616]] 1.247496086065642
900 [[ 0.49649695 -0.13810114  0.64754007  1.52274721 -0.23422952 -0.23407981]] 1.2474732740812107
1000 [[ 0.49647281 -0.13808301  0.64752357  1.5227158  -0.23423798 -0.23407347]] 1

In [1130]:
w, errors = eval_LR_model(X_test.values, y_test.values, iterations=2000, alpha=best_alpha)

200 [[-0.76051665  0.43359037 -0.0109693  -1.60918267  0.92584692  0.03353707]] 0.5255613185944493
400 [[-0.75459626  0.43354253 -0.01103434 -1.61693177  0.92793224  0.0340663 ]] 0.525559902180096
600 [[-0.75455001  0.43354219 -0.01103484 -1.61699249  0.92794862  0.03407044]] 0.5255599020933802
800 [[-0.75454965  0.43354218 -0.01103485 -1.61699297  0.92794875  0.03407048]] 0.5255599020933749
1000 [[-0.75454965  0.43354218 -0.01103485 -1.61699297  0.92794875  0.03407048]] 0.5255599020933748
1200 [[-0.75454965  0.43354218 -0.01103485 -1.61699297  0.92794875  0.03407048]] 0.5255599020933748
1400 [[-0.75454965  0.43354218 -0.01103485 -1.61699297  0.92794875  0.03407048]] 0.5255599020933748
1600 [[-0.75454965  0.43354218 -0.01103485 -1.61699297  0.92794875  0.03407048]] 0.5255599020933749
1800 [[-0.75454965  0.43354218 -0.01103485 -1.61699297  0.92794875  0.03407048]] 0.5255599020933749
2000 [[-0.75454965  0.43354218 -0.01103485 -1.61699297  0.92794875  0.03407048]] 0.5255599020933749


In [1131]:
y_test_pred = calc_pred(w, X_test.values)

In [1132]:
conf_matrix = make_confusion_matrix(y_test_pred, y_test.values)
conf_matrix

Unnamed: 0,y = 1,y = 0
a(x) = 1,10.0,14.0
a(x) = 0,68.0,308.0


In [1133]:
accuracy = calc_accuracy(y_test.values.T, y_test_pred.T)
precision = calc_precision(conf_matrix)
recall = calc_recall(conf_matrix)
fscore = calc_F_score(precision, recall)
f_beta_score = calc_F_beta_score(precision, recall)
print(f'Accuracy:\t{accuracy}\nPrecision:\t{precision}\nRecall: \t{recall}\nF-score:\t{fscore}\nF-beta-score:\t{f_beta_score}')

Accuracy:	0.795
Precision:	0.4166666666666667
Recall: 	0.1282051282051282
F-score:	0.196078431372549
F-beta-score:	0.09901960784313722


#### 6. Могла ли модель переобучиться? Почему?

Модель может переобучиться из-за её сложности и избыточности количества признаков. Чтобы узнать, не переобучилась ли модель, которую я строила для датасета риска сердечной недостаточности, я разделила датасет на train и test. Точность(accuracy) на train получилась 0.836, что немного превышает точность предсказаний на test'е - 0.795. При этом точность(precision) на test'е немного выше, но всё ещё меньше 50%, полнота (recall) так же определила низкое значение. На мой взгляд проблема в несбалансированности выборки конкретно в данной ситуации.

#### 7. *Создайте функции eval_LR_model_l1 и eval_LR_model_l2 с применением L1 и L2 регуляризации соответственно.

###### L1-regularization
$$\sum^{n}_{i=1}L_i(\vec{x}_i,{y_i},\vec{w}) + \lambda \sum^{m}_{j=1}|w_{j}| \rightarrow \underset{w}{\text{min}}$$

###### L2-regularization
$$\sum^{n}_{i=1}L_i(\vec{x}_i,y_i,\vec{w}) + \lambda \sum^{m}_{j=1}w_{j}^{2} \rightarrow \underset{w}{\text{min}}$$

In [1134]:
def eval_LR_model_l1(X, y, iterations, alpha=1e-4, lambda_=1e-8):
    np.random.seed(42)
    w = np.random.randn(X.shape[0])
    n = X.shape[1]
    m = X.shape[0]
    errors = []
    for i in range(1, iterations + 1):
        z = np.dot(w, X)
        y_pred = sigmoid(z)
        y_pred = np.clip(y_pred, 0.00001, 0.99999)
        err = calc_logloss(y, y_pred) + lambda_ / m * np.linalg.norm(w, ord=1)
        w = w - alpha * (1/n * np.dot((y_pred - y), X.T) + lambda_ / m * sum(np.sign(w))) 
        if i % (iterations / 10) == 0:
            errors.append(err)
            print(i, w, err)
    return w, errors

In [1135]:
delim = '-' * 8
best_alpha = 0
best_lambda = 0
err = np.inf
alphas = [1e-8, 1e-6, 1e-3, 1e-1, 1, 0.999]
lambdas = [1e-6, 1e-4, 1e-2, 0.1, 0.111, 0.9, 8e-4, 8e-9]
for a in alphas:
    for l in lambdas:
        print(delim + f' α = {a}, λ = {l} ' + delim)
        w, errors = eval_LR_model_l1(X_train.values, y_train.values, 2000, alpha=a, lambda_=l)
        if errors[-1] < err:
            err = errors[-1]
            best_alpha = a
            best_lambda = l
print(f'logloss: {err}\tbest_alpha: {best_alpha} \tbest_lambda: {best_lambda}')

-------- α = 1e-08, λ = 1e-06 --------
200 [[ 0.49671369 -0.13826398  0.64768832  1.5230292  -0.2341535  -0.2341367 ]] 1.2580361689032797
400 [[ 0.49671322 -0.13826367  0.6476881   1.52302854 -0.23415363 -0.23413645]] 1.2580357303140137
600 [[ 0.49671276 -0.13826335  0.64768788  1.52302789 -0.23415376 -0.2341362 ]] 1.258035291724974
800 [[ 0.4967123  -0.13826303  0.64768766  1.52302723 -0.23415389 -0.23413594]] 1.2580348531361607
1000 [[ 0.49671183 -0.13826271  0.64768743  1.52302657 -0.23415401 -0.23413569]] 1.2580344145475737
1200 [[ 0.49671137 -0.1382624   0.64768721  1.52302592 -0.23415414 -0.23413543]] 1.2580339759592127
1400 [[ 0.4967109  -0.13826208  0.64768699  1.52302526 -0.23415427 -0.23413518]] 1.2580335373710783
1600 [[ 0.49671044 -0.13826176  0.64768677  1.5230246  -0.2341544  -0.23413493]] 1.2580330987831707
1800 [[ 0.49670998 -0.13826144  0.64768655  1.52302395 -0.23415452 -0.23413467]] 1.2580326601954896
2000 [[ 0.49670951 -0.13826113  0.64768633  1.52302329 -0.23415465

800 [[ 0.49652847 -0.13813733  0.64760025  1.52276715 -0.23420444 -0.23403538]] 1.257861406811102
1000 [[ 0.49648206 -0.13810559  0.64757818  1.52270148 -0.23421721 -0.23400998]] 1.257817558026068
1200 [[ 0.49643564 -0.13807385  0.64755611  1.52263581 -0.23422997 -0.23398459]] 1.2577737115054177
1400 [[ 0.49638922 -0.13804211  0.64753404  1.52257014 -0.23424273 -0.2339592 ]] 1.2577298672491646
1600 [[ 0.49634281 -0.13801038  0.64751197  1.52250447 -0.23425549 -0.23393381]] 1.257686025257321
1800 [[ 0.49629639 -0.13797865  0.6474899   1.5224388  -0.23426824 -0.23390842]] 1.2576421855299011
2000 [[ 0.49624998 -0.13794691  0.64746783  1.52237313 -0.234281   -0.23388303]] 1.2575983480669173
-------- α = 1e-06, λ = 0.0001 --------
200 [[ 0.49666773 -0.13823255  0.64766646  1.52296418 -0.23416614 -0.23411156]] 1.2580180927502085
400 [[ 0.4966213  -0.1382008   0.64764439  1.5228985  -0.2341789  -0.23408616]] 1.257974233142624
600 [[ 0.49657488 -0.13816906  0.64762231  1.52283282 -0.23419167 -

1200 [[ 0.23587713  0.02323158  0.52330634  1.15387557 -0.28452644 -0.09903928]] 1.0351857393720043
1400 [[ 0.19612561  0.04462177  0.50434584  1.09747029 -0.28785214 -0.08004551]] 1.005421621112256
1600 [[ 0.15749134  0.064526    0.48595171  1.04256537 -0.28980893 -0.06205863]] 0.9775658302128227
1800 [[ 0.11997896  0.08299896  0.46814116  0.98915404 -0.29045091 -0.04505834]] 0.9515165874880268
2000 [[ 0.08358685  0.10010163  0.45092775  0.93722162 -0.28983987 -0.02901716]] 0.9271678258981615
-------- α = 0.001, λ = 0.0001 --------
200 [[ 0.45074233 -0.10731419  0.62581501  1.45799808 -0.24620646 -0.20918723]] 1.2155376681952592
400 [[ 0.40571437 -0.07798074  0.60436416  1.39431029 -0.25680681 -0.18517103]] 1.1750604523549886
600 [[ 0.36167588 -0.05026917  0.58336062  1.33201628 -0.26593569 -0.16212413]] 1.136832554417623
800 [[ 0.31866634 -0.02417358  0.56282993  1.27115684 -0.27358774 -0.14007366]] 1.1008174490665232
1000 [[ 2.76718540e-01  3.23047332e-04  5.42797667e-01  1.21176312

800 [[-0.86447133  0.4956242   0.11114599 -2.30488121  1.23581718  0.25833414]] 0.4389283900516319
1000 [[-0.79299322  0.51118017  0.11241303 -2.4573461   1.29654982  0.2557861 ]] 0.4372918975583471
1200 [[-0.73815284  0.52099467  0.11275519 -2.56459514  1.33718867  0.25361704]] 0.4364692449819458
1400 [[-0.69730151  0.52764243  0.11276895 -2.64189735  1.36572396  0.25195087]] 0.4360393756777753
1600 [[-0.66717352  0.53236709  0.11268558 -2.69844927  1.38634709  0.25071873]] 0.4358093036591352
1800 [[-0.64498719  0.53582284  0.11258802 -2.74021345  1.40150568  0.24981914]] 0.4356841370366589
2000 [[-0.6286146   0.53839072  0.11250222 -2.771249    1.41275784  0.24916346]] 0.43561524164607923
-------- α = 0.1, λ = 0.0001 --------
200 [[-0.98153426  0.32913472  0.09254048 -1.03044495  0.55713767  0.24274385]] 0.48580975019927125
400 [[-1.02852288  0.4183868   0.09899447 -1.71666687  0.95868735  0.25893971]] 0.45131134955032437
600 [[-0.95013859  0.46851523  0.10739174 -2.07850566  1.13741

200 [[-0.62734902  0.53859615  0.11249639 -2.77368496  1.413646    0.24911381]] 0.4356128988122103
400 [[-0.58323622  0.54575908  0.11223524 -2.85906236  1.44468455  0.24740506]] 0.43552835192743705
600 [[-0.58074518  0.54617969  0.11222033 -2.86398532  1.44648247  0.24731175]] 0.4355280760287528
800 [[-0.58059935  0.54620437  0.11221946 -2.86427389  1.44658789  0.2473063 ]] 0.4355280750852697
1000 [[-0.58059079  0.54620582  0.1122194  -2.86429083  1.44659408  0.24730598]] 0.4355280750822277
1200 [[-0.58059029  0.54620591  0.1122194  -2.86429182  1.44659444  0.24730596]] 0.43552807508222924
1400 [[-0.58059026  0.54620591  0.1122194  -2.86429188  1.44659446  0.24730596]] 0.43552807508222996
1600 [[-0.58059026  0.54620591  0.1122194  -2.86429188  1.44659446  0.24730596]] 0.43552807508222996
1800 [[-0.58059026  0.54620591  0.1122194  -2.86429188  1.44659446  0.24730596]] 0.43552807508222996
2000 [[-0.58059026  0.54620591  0.1122194  -2.86429188  1.44659446  0.24730596]] 0.4355280750822299

400 [[-0.58325144  0.54575652  0.11223533 -2.85903233  1.44467359  0.24740564]] 0.4355283551108906
600 [[-0.58074651  0.54617946  0.11222034 -2.86398268  1.4464815   0.2473118 ]] 0.43552807604513766
800 [[-0.58059945  0.54620436  0.11221946 -2.86427369  1.44658782  0.24730631]] 0.4355280750853427
1000 [[-0.5805908   0.54620582  0.1122194  -2.86429081  1.44659407  0.24730598]] 0.4355280750822278
1200 [[-0.58059029  0.54620591  0.1122194  -2.86429182  1.44659444  0.24730596]] 0.43552807508222924
1400 [[-0.58059026  0.54620591  0.1122194  -2.86429188  1.44659446  0.24730596]] 0.43552807508222996
1600 [[-0.58059026  0.54620591  0.1122194  -2.86429188  1.44659446  0.24730596]] 0.43552807508223007
1800 [[-0.58059026  0.54620591  0.1122194  -2.86429188  1.44659446  0.24730596]] 0.43552807508222996
2000 [[-0.58059026  0.54620591  0.1122194  -2.86429188  1.44659446  0.24730596]] 0.43552807508222996
-------- α = 0.999, λ = 0.0001 --------
200 [[-0.62725024  0.5382749   0.11231976 -2.77263055  1.

2000 [[-0.580592    0.54620907  0.11222119 -2.86430122  1.44660033  0.24730726]] 0.43552760151736963
logloss: 0.43552760151736963	best_alpha: 1 	best_lambda: 8e-09


In [1136]:
w, errors = eval_LR_model_l1(X_train.values, y_train.values, 2000, alpha=best_alpha, lambda_=best_lambda)
y_train_pred = calc_pred(w, X_train.values)
conf_matrix = make_confusion_matrix(y_train_pred, y_train.values)
conf_matrix

200 [[-0.6273514   0.53859915  0.11249817 -2.77369288  1.41365133  0.24911511]] 0.4356124288576214
400 [[-0.58323803  0.54576223  0.11223703 -2.85907155  1.44469037  0.24740636]] 0.4355278784449196
600 [[-0.58074692  0.54618285  0.11222212 -2.86399465  1.44648834  0.24731305]] 0.43552760246789574
800 [[-0.58060109  0.54620753  0.11222124 -2.86428323  1.44659376  0.2473076 ]] 0.4355276015206401
1000 [[-0.58059253  0.54620898  0.11222119 -2.86430017  1.44659995  0.24730728]] 0.43552760151738074
1200 [[-0.58059203  0.54620907  0.11222119 -2.86430116  1.44660031  0.24730726]] 0.4355276015173698
1400 [[-0.580592    0.54620907  0.11222119 -2.86430122  1.44660033  0.24730726]] 0.43552760151736963
1600 [[-0.580592    0.54620907  0.11222119 -2.86430122  1.44660033  0.24730726]] 0.43552760151736963
1800 [[-0.580592    0.54620907  0.11222119 -2.86430122  1.44660033  0.24730726]] 0.43552760151736963
2000 [[-0.580592    0.54620907  0.11222119 -2.86430122  1.44660033  0.24730726]] 0.4355276015173696

Unnamed: 0,y = 1,y = 0
a(x) = 1,9.0,17.0
a(x) = 0,81.0,492.0


In [1137]:
accuracy = calc_accuracy(y_train.values.T, y_train_pred.T)
precision = calc_precision(conf_matrix)
recall = calc_recall(conf_matrix)
fscore = calc_F_score(precision, recall)
f_beta_score = calc_F_beta_score(precision, recall)
print(f'Accuracy:\t{accuracy}\nPrecision:\t{precision}\nRecall: \t{recall}\nF-score:\t{fscore}\nF-beta-score:\t{f_beta_score}')

Accuracy:	0.8363939899833055
Precision:	0.34615384615384615
Recall: 	0.1
F-score:	0.15517241379310348
F-beta-score:	0.07836206896551724


In [1138]:
def eval_LR_model_l2(X, y, iterations, alpha=1e-4, lambda_=1e-8):
    np.random.seed(42)
    w = np.random.randn(X.shape[0])
    m = X.shape[0]
    errors = []
    
    for i in range(1, iterations + 1):
        z = np.dot(w, X)
        y_pred = sigmoid(z)
        y_pred = np.clip(y_pred, 0.00001, 0.99999)
        err = calc_logloss(y, y_pred) + lambda_ / (2 * m) * np.linalg.norm(w, ord=2)
        w = w - alpha * (1/n * np.dot((y_pred - y), X.T) + lambda_ / m * np.sum(w))
        if i % (iterations / 10) == 0:
            errors.append(err)
            print(i, w, err)
    return w, errors

In [1139]:
delim = '-' * 8
best_alpha = 0
best_lambda = 0
err = np.inf
alphas = [1e-8, 1e-6, 1e-3, 1e-1, 1, 0.999]
lambdas = [1e-6, 1e-4, 1e-2, 0.1, 0.111, 0.9, 8e-4, 8e-9]
for a in alphas:
    for l in lambdas:
        print(delim + f' α = {a}, λ = {l} ' + delim)
        w, errors = eval_LR_model_l2(X_train.values, y_train.values, 2000, alpha=a, lambda_=l)
        if errors[-1] < err:
            err = errors[-1]
            best_alpha = a
            best_lambda = l
print(f'logloss: {err}\tbest_alpha: {best_alpha} \tbest_lambda: {best_lambda}')

-------- α = 1e-08, λ = 1e-06 --------
200 [[ 0.49668635 -0.13824529  0.64767532  1.52299052 -0.23416102 -0.23412174]] 1.2580103587916496
400 [[ 0.49665854 -0.13822627  0.6476621   1.52295117 -0.23416867 -0.23410653]] 1.2579840885079048
600 [[ 0.49663073 -0.13820725  0.64764887  1.52291183 -0.23417632 -0.23409132]] 1.2579578190366054
800 [[ 0.49660293 -0.13818824  0.64763565  1.52287249 -0.23418397 -0.23407611]] 1.2579315503777528
1000 [[ 0.49657512 -0.13816923  0.64762243  1.52283315 -0.23419161 -0.2340609 ]] 1.2579052825313508
1200 [[ 0.49654732 -0.13815021  0.64760921  1.52279382 -0.23419926 -0.23404569]] 1.257879015497401
1400 [[ 0.49651951 -0.1381312   0.64759599  1.52275448 -0.23420691 -0.23403047]] 1.257852749275908
1600 [[ 0.49649171 -0.13811219  0.64758277  1.52271514 -0.23421455 -0.23401526]] 1.2578264838668736
1800 [[ 0.49646391 -0.13809318  0.64756955  1.5226758  -0.2342222  -0.23400005]] 1.257800219270302
2000 [[ 0.4964361  -0.13807417  0.64755633  1.52263646 -0.23422984 -

600 [[ 0.48838663 -0.13258525  0.64372853  1.51124822 -0.23642514 -0.2295875 ]] 1.250204604663313
800 [[ 0.48561726 -0.13070375  0.64241142  1.5073303  -0.23717215 -0.22807745]] 1.2476058552215092
1000 [[ 0.48285113 -0.12882801  0.64109576  1.50341705 -0.23791403 -0.22657064]] 1.2450152394606386
1200 [[ 0.48008827 -0.12695803  0.63978155  1.49950848 -0.23865076 -0.22506707]] 1.242432759057508
1400 [[ 0.47732868 -0.12509383  0.63846881  1.49560461 -0.23938235 -0.22356676]] 1.2398584154299284
1600 [[ 0.47457238 -0.1232354   0.63715753  1.49170544 -0.24010878 -0.22206971]] 1.2372922097362926
1800 [[ 0.47181937 -0.12138275  0.63584773  1.487811   -0.24083005 -0.22057594]] 1.2347341428751906
2000 [[ 0.46906967 -0.11953587  0.6345394   1.48392129 -0.24154615 -0.21908545]] 1.2321842154850777
-------- α = 1e-06, λ = 0.0001 --------
200 [[ 0.49393509 -0.13636554  0.64636709  1.519098   -0.23491575 -0.23261727]] 1.2554410130895743
400 [[ 0.49115924 -0.13447253  0.64504708  1.51517078 -0.23567301

600 [[-1.03693865  0.40366792  0.09674277 -1.61414067  0.90335079  0.25758782]] 0.45466568016385167
800 [[-1.00193359  0.44220976  0.10316124 -1.88273747  1.04405087  0.26038225]] 0.44664030143879313
1000 [[-0.95086534  0.46843796  0.1074982  -2.07690783  1.13678349  0.26043296]] 0.44245664027728915
1200 [[-0.89817453  0.48647442  0.11007284 -2.22387023  1.20179358  0.25933013]] 0.4400231030342119
1400 [[-0.84943955  0.49921191  0.11150348 -2.33847227  1.24951192  0.25783303]] 0.43851522812918736
1600 [[-0.80655495  0.50849668  0.11225148 -2.42968399  1.28579589  0.25629844]] 0.43754464658523495
1800 [[-0.76979576  0.51548187  0.11260979 -2.50333506  1.31414304  0.25488233]] 0.4369045485280694
2000 [[-0.73874354  0.52088879  0.11275351 -2.56343008  1.33674851  0.25364094]] 0.4364753757990745
-------- α = 0.001, λ = 0.0001 --------
200 [[-0.80139979  0.29209152  0.11927744 -0.50896271  0.22917865  0.21849926]] 0.5389120122529825
400 [[-1.01643749  0.34948874  0.09136942 -1.21266997  0.6

600 [[-0.58059198  0.54620913  0.11222123 -2.86430126  1.44660039  0.24730729]] 0.4355278741598264
800 [[-0.58059198  0.54620913  0.11222123 -2.86430126  1.44660039  0.24730729]] 0.4355278741598264
1000 [[-0.58059198  0.54620913  0.11222123 -2.86430126  1.44660039  0.24730729]] 0.4355278741598264
1200 [[-0.58059198  0.54620913  0.11222123 -2.86430126  1.44660039  0.24730729]] 0.4355278741598264
1400 [[-0.58059198  0.54620913  0.11222123 -2.86430126  1.44660039  0.24730729]] 0.4355278741598264
1600 [[-0.58059198  0.54620913  0.11222123 -2.86430126  1.44660039  0.24730729]] 0.4355278741598264
1800 [[-0.58059198  0.54620913  0.11222123 -2.86430126  1.44660039  0.24730729]] 0.4355278741598264
2000 [[-0.58059198  0.54620913  0.11222123 -2.86430126  1.44660039  0.24730729]] 0.4355278741598264
-------- α = 0.1, λ = 0.0001 --------
200 [[-0.58058912  0.54621191  0.11222411 -2.86429781  1.44660137  0.24730882]] 0.43555524383313154
400 [[-0.58058911  0.54621191  0.11222411 -2.86429784  1.4466013

800 [[ -5.43806103  -3.30508635   4.36037703 -12.40355303  11.13484538
    1.08111093]] 2.3134474711962545
1000 [[ -5.43806103  -3.30508635   4.36037703 -12.40355303  11.13484538
    1.08111093]] 2.3134474711962545
1200 [[ -5.43806103  -3.30508635   4.36037703 -12.40355303  11.13484538
    1.08111093]] 2.3134474711962545
1400 [[ -5.43806103  -3.30508635   4.36037703 -12.40355303  11.13484538
    1.08111093]] 2.3134474711962545
1600 [[ -5.43806103  -3.30508635   4.36037703 -12.40355303  11.13484538
    1.08111093]] 2.3134474711962545
1800 [[ -5.43806103  -3.30508635   4.36037703 -12.40355303  11.13484538
    1.08111093]] 2.3134474711962545
2000 [[ -5.43806103  -3.30508635   4.36037703 -12.40355303  11.13484538
    1.08111093]] 2.3134474711962545
-------- α = 1, λ = 0.0001 --------
200 [[ -5.43788895  -3.30493751   4.36060193 -12.40339588  11.13517645
    1.08153936]] 2.313637922061362
400 [[ -5.43788895  -3.30493751   4.36060193 -12.40339588  11.13517645
    1.08153936]] 2.3136379220614

1800 [[ -5.43806275  -3.30508784   4.36037477 -12.40355461  11.13484206
    1.08110664]] 2.3134455627974035
2000 [[ -5.43806275  -3.30508784   4.36037477 -12.40355461  11.13484206
    1.08110664]] 2.3134455627974035
-------- α = 0.999, λ = 1e-06 --------
200 [[ -5.43234809  -3.30151954   4.35588095 -12.39209188  11.1240766
    1.07989962]] 2.3125504405969317
400 [[ -5.43234809  -3.30151954   4.35588095 -12.39209188  11.1240766
    1.07989962]] 2.3125504405971
600 [[ -5.43234809  -3.30151954   4.35588095 -12.39209188  11.1240766
    1.07989962]] 2.3125504405971
800 [[ -5.43234809  -3.30151954   4.35588095 -12.39209188  11.1240766
    1.07989962]] 2.3125504405971005
1000 [[ -5.43234809  -3.30151954   4.35588095 -12.39209188  11.1240766
    1.07989962]] 2.3125504405971
1200 [[ -5.43234809  -3.30151954   4.35588095 -12.39209188  11.1240766
    1.07989962]] 2.3125504405971005
1400 [[ -5.43234809  -3.30151954   4.35588095 -12.39209188  11.1240766
    1.07989962]] 2.3125504405971
1600 [[ -5.4

600 [[ -5.43234982  -3.30152103   4.3558787  -12.39209345  11.12407329
    1.07989534]] 2.3125485344518992
800 [[ -5.43234982  -3.30152103   4.3558787  -12.39209345  11.12407329
    1.07989534]] 2.3125485344518992
1000 [[ -5.43234982  -3.30152103   4.3558787  -12.39209345  11.12407329
    1.07989534]] 2.3125485344518992
1200 [[ -5.43234982  -3.30152103   4.3558787  -12.39209345  11.12407329
    1.07989534]] 2.312548534451899
1400 [[ -5.43234982  -3.30152103   4.3558787  -12.39209345  11.12407329
    1.07989534]] 2.3125485344518992
1600 [[ -5.43234982  -3.30152103   4.3558787  -12.39209345  11.12407329
    1.07989534]] 2.312548534451899
1800 [[ -5.43234982  -3.30152103   4.3558787  -12.39209345  11.12407329
    1.07989534]] 2.3125485344518992
2000 [[ -5.43234982  -3.30152103   4.3558787  -12.39209345  11.12407329
    1.07989534]] 2.3125485344518992
logloss: 0.4355275999099934	best_alpha: 0.1 	best_lambda: 8e-09


In [1140]:
w, errors = eval_LR_model_l2(X_train.values, y_train.values, 2000, alpha=best_alpha, lambda_=best_lambda)
y_train_pred = calc_pred(w, X_train.values)
conf_matrix = make_confusion_matrix(y_train_pred, y_train.values)
conf_matrix

200 [[-0.58059203  0.5462091   0.11222121 -2.86430126  1.44660037  0.24730727]] 0.4355275999099935
400 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934
600 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934
800 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934
1000 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934
1200 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934
1400 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934
1600 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934
1800 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934
2000 [[-0.58059201  0.5462091   0.1122212  -2.8643013   1.44660038  0.24730727]] 0.4355275999099934


Unnamed: 0,y = 1,y = 0
a(x) = 1,9.0,17.0
a(x) = 0,81.0,492.0


In [1141]:
accuracy = calc_accuracy(y_train.values.T, y_train_pred.T)
precision = calc_precision(conf_matrix)
recall = calc_recall(conf_matrix)
fscore = calc_F_score(precision, recall)
f_beta_score = calc_F_beta_score(precision, recall)
print(f'Accuracy:\t{accuracy}\nPrecision:\t{precision}\nRecall: \t{recall}\nF-score:\t{fscore}\nF-beta-score:\t{f_beta_score}')

Accuracy:	0.8363939899833055
Precision:	0.34615384615384615
Recall: 	0.1
F-score:	0.15517241379310348
F-beta-score:	0.07836206896551724
