## Testing importing TF(Keras) logistic regression

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
%matplotlib inline

In [40]:
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [41]:
def NN_pred(yhat):
    if yhat >= 0.5:
        return 1
    else:
        return 0

In [42]:
def eval_err(y, yhat):
    m = y.shape[0]
    incorrect = 0
    y = y.tolist()
    for i in range(m):
        if yhat[i] != y[i]:
            incorrect += 1
            
    incorrect = incorrect / m
    
    return incorrect 

In [43]:
def pred_output(prediction):
    ex = prediction.shape[0]
    output = []
    for i in range(ex):
        output.append(NN_pred(prediction[i]))
    
    return output

In [3]:
train_set = pd.read_csv(("../TrainTestSet/TrainSet1.csv"))
train_set.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Q,S,AgeFare,SibPar
0,0,3,1,22.0,1,0,7.25,0,1,159.5,1
1,1,1,0,38.0,1,0,71.2833,0,0,2708.7654,1
2,1,3,0,26.0,0,0,7.925,0,1,206.05,0
3,1,1,0,35.0,1,0,53.1,0,1,1858.5,1
4,0,3,1,35.0,0,0,8.05,0,1,281.75,0


### Creating x_train, y_train set

In [172]:
y = train_set['Survived']
x = train_set.drop(['Survived'], axis=1)

In [173]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=101)

### Fitting Sklearn logistic Reg

In [174]:
model_sk_log = LogisticRegression(max_iter=1000, C=0.1)

In [175]:
model_sk_log.fit(X_train, y_train)

LogisticRegression(C=0.1, max_iter=1000)

### Predicting

In [176]:
pred_test = model_sk_log.predict(X_test)
pred_train = model_sk_log.predict(X_train)

## Model Results

In [177]:
print('Train Accuracy: %f'%(np.mean(pred_train == y_train) * 100))

Train Accuracy: 80.731364


In [178]:
error_cv = eval_err(y_test, pred_test)
error_train = eval_err(y_train, pred_train)
print(f"error test:  {error_cv :0.3f}")
print(f"error train: {error_train :0.3f}")

error test:  0.174
error train: 0.193


In [179]:
print(classification_report(y_test,pred_test))

              precision    recall  f1-score   support

           0       0.80      0.94      0.87       107
           1       0.88      0.65      0.75        71

    accuracy                           0.83       178
   macro avg       0.84      0.80      0.81       178
weighted avg       0.83      0.83      0.82       178



In [180]:
print(confusion_matrix(y_test,pred_test))

[[101   6]
 [ 25  46]]


# Building Log Reg model

In [181]:
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()

### Scaling the train set

In [182]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train, y_train)
X_test = scaler.fit_transform(X_test, y_test)

## To create a Logistic Regression :
##### 1. Create sigmoid function
##### 2. Compute loss function, sum to get cost (add regularization)
##### 3. Add gradient descent calculation 
##### 4. Updating the weights
##### 5. Check predictions

### 1. Sigmoid Function

In [183]:
def sigmoid(z):

    calc = math.e**-z
    g = 1 / (1 + calc)

    return g

#### Cost Function Reference (used Andrew Ng ML Specialization):

<a name="2.4"></a>
### 2.4 Cost function for logistic regression

In this section, you will implement the cost function for logistic regression.

<a name='ex-02'></a>
### Exercise 2

Please complete the `compute_cost` function using the equations below.

Recall that for logistic regression, the cost function is of the form 

$$ J(\mathbf{w},b) = \frac{1}{m}\sum_{i=0}^{m-1} \left[ loss(f_{\mathbf{w},b}(\mathbf{x}^{(i)}), y^{(i)}) \right] \tag{1}$$

where
* m is the number of training examples in the dataset


* $loss(f_{\mathbf{w},b}(\mathbf{x}^{(i)}), y^{(i)})$ is the cost for a single data point, which is - 

    $$loss(f_{\mathbf{w},b}(\mathbf{x}^{(i)}), y^{(i)}) = (-y^{(i)} \log\left(f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) - \left( 1 - y^{(i)}\right) \log \left( 1 - f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) \tag{2}$$
    
    
*  $f_{\mathbf{w},b}(\mathbf{x}^{(i)})$ is the model's prediction, while $y^{(i)}$, which is the actual label

*  $f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = g(\mathbf{w} \cdot \mathbf{x^{(i)}} + b)$ where function $g$ is the sigmoid function.
    * It might be helpful to first calculate an intermediate variable $z_{\mathbf{w},b}(\mathbf{x}^{(i)}) = \mathbf{w} \cdot \mathbf{x^{(i)}} + b = w_0x^{(i)}_0 + ... + w_{n-1}x^{(i)}_{n-1} + b$ where $n$ is the number of features, before calculating $f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = g(z_{\mathbf{w},b}(\mathbf{x}^{(i)}))$


### 2. Cost function

In [184]:
def loss_calc(X, y, w, b, lambda_):
    
    train_ex, featur = X.shape
    total_cost = 0 
    
    for i in range(train_ex):
        
        #Uses the activation function (sigmoid) 
        sig_i = sigmoid(np.dot(X[i],w) + b)
        #Uses the loss function calculation
        check = (- y[i] * np.log(sig_i)) - ((1 - y[i]) * np.log(1 - sig_i))
        total_cost += check
    
    #Avgs to train_examples
    total_cost = total_cost / train_ex
    
    #Regularization term 
    reg = 0
    
    for j in range(featur):
        #Squares the weights
        reg += w[j]**2
    
    #Uses the lambda to "punish" large weights
    reg = lambda_/(2*featur * reg)
    
    #Cost function with the regularization added 
    final_cost = total_cost + reg
    
    return final_cost

### 3. Gradient Descent

In [185]:
def gradient_descent(X, y, w, b, lambda_):
    train_ex, featur = X.shape
    
    dj_dw = np.zeros(w.shape)
    dj_db = 0.
    
    for i in range(train_ex):
        pred = sigmoid(np.dot(X[i], w) + b)
        #For each train example: takes the predction - the truth
        dj_db += pred - y[i]
        
        for j in range(featur):
            #For each train example and each feature within it: predction - the truth * value of [example] [feature]
            dj_dw[j] += (pred - y[i]) * X[i][j]
    
    #Divides the w and b's derivatives per the number of features
    dj_db = dj_db / featur
    dj_dw = dj_dw / featur
    
    #Adds a regularization term to W's derivative (punish large weights)
    for i in range(featur):
        reg = lambda_ / featur * w[j]
        dj_dw[j] += reg
    
    return dj_dw, dj_db

### 4. Updates the weights and biases

In [208]:
def compute_gradient(X, y, w_in, b_in, cost_func, gradient_func, alpha, iterat, lambda_):
    examples = X.shape[0]
    #List to ilustrate the cost function history
    Cost_Journey = []
    
    for i in range(iterat):
        #For each iteration: call the gradient function and updates the weights and biases (mind the alpha hehe)
        dj_dw, dj_db = gradient_func(X, y, w_in, b_in, lambda_)
        
        w_in = w_in - alpha * dj_dw
        b_in = b_in - alpha * dj_db
        
        #Just a code to print the cost throughout the training
        if i % (iterat/10) == 0 or i == (iterat-1):
            cost = cost_func(X, y, w_in, b_in, lambda_)
            Cost_Journey.append(cost)
            print(f"Iteration {i:4}: Cost {float(Cost_Journey[-1]):8.3f}   ")
    
    return w_in, b_in, Cost_Journey

### Running the Model

In [243]:
#Model parameters
alpha = 0.01
iterations = 5000      #After about 1K iter the cost function does not drop
lambda_ = 0.001

#Creates a random matrix for W to start diff from 0
np.random.seed(1)
i_w = np.random.rand(X_train.shape[1])-0.5
i_b = 1.
        
w, b, cost = compute_gradient(X_train, y_train, i_w, i_b, loss_calc, gradient_descent, alpha, iterations, lambda_)

Iteration    0: Cost    0.854   
Iteration  500: Cost    0.446   
Iteration 1000: Cost    0.446   
Iteration 1500: Cost    0.446   
Iteration 2000: Cost    0.446   
Iteration 2500: Cost    0.446   
Iteration 3000: Cost    0.446   
Iteration 3500: Cost    0.446   
Iteration 4000: Cost    0.446   
Iteration 4500: Cost    0.446   
Iteration 4999: Cost    0.446   


In [241]:
#Something like a GridSearchCV 
#needs to create a list for alpha and lambda_
answers = []
for i in range(len(alpha)):
    for j in range(len(lambda_)):
        w, b, cost = compute_gradient(X_train, y_train, i_w, i_b, loss_calc, gradient_descent, alpha[i], iterations, lambda_[j])
        answers.append(cost[-1])
        answers.append(alpha[i])
        answers.append(lambda_[j])

[0.4470502037403722, 0.001, 0.001, 0.4472136616977054, 0.001, 0.01, 0.44884902254131775, 0.001, 0.1, 0.44647776173080006, 0.01, 0.001, 0.4466166457081425, 0.01, 0.01, 0.44800884883207254, 0.01, 0.1, 0.5188852241600901, 0.1, 0.001, 0.5189938257380772, 0.1, 0.01, 0.5200503059960984, 0.1, 0.1]


### Function to get the prediction given a threshold

In [244]:
def predict(X, w, b, threshold):
    m,n = X.shape
    p = np.zeros(m)
    
    for i in range(m):
        #Takes the predction given the weights and biases after adjustment
        kick = np.dot(X[i], w) + b
        
        #Prediction threshold
        if kick >= threshold:
            p[i] = 1
        else:
            p[i] = 0
    
    return p

## Model Results

In [250]:
p_train = predict(X_train, w, b, 0.5)
print('Train Accuracy: %f'%(np.mean(p_train == y_train) * 100))
p = predict(X_test, w,b, 0.5)
print('Test Accuracy: %f'%(np.mean(p == y_test) * 100))

Train Accuracy: 81.012658
Test Accuracy: 81.460674


In [251]:
error_cv = eval_err(y_test, p)
error_train = eval_err(y_train, p_train)
print(f"error cv:    {error_cv :0.3f}")
print(f"error train: {error_train :0.3f}")

error cv:    0.185
error train: 0.190


## Result of Self-Made logistic Reg


In [252]:
print(classification_report(y_test,p))

              precision    recall  f1-score   support

           0       0.78      0.96      0.86       107
           1       0.91      0.59      0.72        71

    accuracy                           0.81       178
   macro avg       0.85      0.78      0.79       178
weighted avg       0.83      0.81      0.80       178



In [253]:
print(confusion_matrix(y_test,p))

[[103   4]
 [ 29  42]]
