In [None]:
import numpy as np # linear algebra
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
data = pd.read_csv('../input/voice.csv')

# What is Logistic Regression ? 

Logistic regression is the appropriate regression analysis to conduct when the dependent variable is dichotomous (binary).Like all regression analyses, the logistic regression is a predictive analysis.  Logistic regression is used to describe data and to explain the relationship between one dependent binary variable and one or more nominal, ordinal, interval or ratio-level independent variables.

#### Logistic regression steps are :
- Inıtialize Weight and Bias values,
- Function Sigmoid,
- Forward Propagation,  ( In loop with backward to find minimum cost value )
- Backward Propagation,  ( In loop with forward to find minimum cost value )
- Update Weight and Bias,
- Then predict

Lets check what we have

In [None]:
data.head()

## -You see  "label" column ? This column is our result column. Lets convert it to binary system for machine. For comparison we need to convert data to binary.

In [None]:
data.label = [1 if each == "male" else 0 for each in data.label]
data.label.unique()

In [None]:
data.label.value_counts()

As you see above, 1 represents "male" and 0 represents "female".
- Now we need to split data

In [None]:
y = data.label.values
x_data = data.drop(["label"],axis=1)
## And normalization
x = (x_data - np.min(x_data))/(np.max(x_data)-np.min(x_data)).values

In [None]:
x.head()

#### Why normalization is needed? 
 - The answer is simple. "Domination". If one feature has low values and other feature has high values, high values will dominate low values while calculating. For example;
  * In second index,  kurt feature value is 1024 and sp.ent 0.84. If we calculate both of them in one process kurt feature will dominate sp.ent. See?

*Then, lets split our data to train and test

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2,random_state=42) # random state for stability

x_train = x_train.T
x_test = x_test.T
y_train = y_train.T
y_test = y_test.T

## Initialize Weight and Bias:

- This function is where we initialize weight and bias.
- Our formula is z = (weight * feature) + b
               y_head = sigmoid(z)
- Sigmoid for scale the result -1 and 1
                    

In [None]:
def init_weights_bias(dimension):
    w = np.full((dimension,1),0.01)
    b = 0.0
    return w, b
def sigmoid(z):
    y_head = 1 / (1 + np.exp(-z))
    return y_head

The forward_backward_propagation function can be explained by the logical regression algorithm

NOTE: This algorithm is described in detail https://www.kaggle.com/kanncaa1/deep-learning-tutorial-for-beginners

In [None]:
def forward_backward_propagation(w,b,x_train,y_train):
    z = np.dot(w.T,x_train) + b
    y_head = sigmoid(z)
    loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
    cost = (np.sum(loss))/x_train.shape[1]    
    
    derivative_weight = (np.dot(x_train,((y_head-y_train).T)))/x_train.shape[1] 
    derivative_bias = np.sum(y_head-y_train)/x_train.shape[1]                 
    gradients = {"derivative_weight": derivative_weight,"derivative_bias": derivative_bias}
    return cost, gradients

### After Forward and Backward propagation we got some weight and bias values, so we need to update them for fitting, for the best predictions
- Update function is where we update bias and weight values

In [None]:
def update_weights_bias(w,b,x_train,y_train,learning_rate,number_of_iterarion):
    cost_list = []
    cost_list2 = []
    index = []
    for i in range(number_of_iterarion):
        cost,gradients = forward_backward_propagation(w,b,x_train,y_train)
        cost_list.append(cost)
        w = w - learning_rate * gradients["derivative_weight"] 
        b = b - learning_rate * gradients["derivative_bias"]   
        if i % 10 == 0:
            cost_list2.append(cost)
            index.append(i)
            print ("Cost after iteration %i: %f" %(i, cost))
    parameters = {"weight": w,"bias": b}
    plt.plot(index,cost_list2)
    plt.xticks(index,rotation='vertical')
    plt.xlabel("Number of iteration")
    plt.ylabel("Cost")
    plt.show()
    return parameters, gradients, cost_list

In [None]:
def predict(w,b,x_test):
    z = sigmoid(np.dot(w.T,x_test)+b)
    Y_prediction = np.zeros((1,x_test.shape[1]))
    for i in range(z.shape[1]):
        if z[0,i]<= 0.5:
            Y_prediction[0,i] = 0
        else:
            Y_prediction[0,i] = 1

    return Y_prediction

In [None]:
def logistic_regression(x_train, y_train, x_test, y_test, learning_rate, num_iterations):
    dimension = x_train.shape[0]
    w, b = init_weights_bias(dimension)

    parameters,gradients,cost_list = update_weights_bias(w,b,x_train,y_train,learning_rate,num_iterations)

    y_prediction_test = predict(parameters["weight"],parameters["bias"],x_test)
    print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))
    return y_prediction_test #Estimates for Complex Matrix
    
y_predict = logistic_regression(x_train, y_train, x_test, y_test, learning_rate=1, num_iterations=300)

## After Logisctic regression our predictions 97% correct. It is good result. 
    - As you can see If iteration increase, cost value will decrease. Please remember this, I will mention it below.
    - Now lets show with confusion matrix what is wrong?

In [None]:
predict = []
for i in range(0,1):
    for each in y_predict[i]:
        predict.append(int(each))

conf_matrix = confusion_matrix(y_test,predict)
f,ax = plt.subplots(figsize=(7,7))
sns.heatmap(conf_matrix,annot=True,linewidths=0.5,linecolor="white",fmt=".0f",ax=ax)
plt.xlabel("y_test")
plt.ylabel("predict")
plt.show()

*According to our confusion matrix the model predict 7  and 11 wrong sample. 7 represent it is 0 but model predict 1 instead of it, 11 also represent it is 1 but model predict 0 instead of it*

*In the logic regression learning rate is important. If you pick it too high, you can skip minimum cost value, if you pick it too low, model will learn slowly and will take time to find minimum value.*


*Iteration number is another important issue. If you pick high value it takes time to finish iterations but your model learn better, if you pick low value, it will finished faster but will learn less* 

# But dont worry, it is for your learning. Sklearn do all this process for you.  

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(x_train.T,y_train.T)
print("test accuracy {}".format(lr.score(x_test.T,y_test.T)))

### *See it is so easy with sklearn :)*

## Conclusion
- We initialize weight and bias, these are our learning parameters.
- z = (weight * feature) +b 
- y = sigmoid(z)
- Update weight and bias for minimum cost value with forward and backward propagation
- Predict and run all function
- With 97% it is good score :)

- So our purpose is update weight and bias, minimize cost value / so our model learn well and give us good score and we get minimum cost value.

## Thank you for your time, if you like it please uptove to motivate :) 