In [92]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline

In [93]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

In [94]:
from sklearn.preprocessing import LabelEncoder

In [95]:
loans = pd.read_csv("feature_selected_voice_data.csv")

In [96]:
loans.head()

Unnamed: 0,meanfreq,sd,median,Q25,Q75,IQR,kurt,sp.ent,sfm,mode,meanfun,minfun,modindx,label
0,0.059781,0.064241,0.032027,0.015071,0.090193,0.075122,274.402906,0.893369,0.491918,0.0,0.084279,0.015702,0.0,male
1,0.066009,0.06731,0.040229,0.019414,0.092666,0.073252,634.613855,0.892193,0.513724,0.0,0.107937,0.015826,0.052632,male
2,0.077316,0.083829,0.036718,0.008701,0.131908,0.123207,1024.927705,0.846389,0.478905,0.0,0.098706,0.015656,0.046512,male
3,0.151228,0.072111,0.158011,0.096582,0.207955,0.111374,4.177296,0.963322,0.727232,0.083878,0.088965,0.017798,0.247119,male
4,0.13512,0.079146,0.124656,0.07872,0.206045,0.127325,4.333713,0.971955,0.783568,0.104261,0.106398,0.016931,0.208274,male


In [97]:
loans.shape

(3168, 14)

In [2]:
import numpy as np
a = np.array([1,2])
b = np.array([1,2])
print(a*b)
print(np.dot(a,b))

[1 4]
5


In [98]:
X = loans.iloc[:,:-1]
y = loans.iloc[:,13]

In [99]:
le = LabelEncoder()
y = le.fit_transform(y)

Logistic Regression Model

In [102]:
def sigmoid(z):
    return 1.0/(1 + np.exp(-z))

In [103]:
def loss(y, y_hat):
    loss = -np.mean(y*(np.log(y_hat)) - (1-y)*np.log(1-y_hat))
    return loss

In [104]:
def gradients(X, y, y_hat):
    m = X.shape[0]
    dw = (1/m)*np.dot(X.T, (y_hat - y))
    db = (1/m)*np.sum((y_hat - y)) 
    return dw, db

In [106]:
def normalize(X):
    m, n = X.shape
    for i in range(n):
        X = (X - X.mean(axis=0))/X.std(axis=0)
        
    return X

In [107]:
def train(X, y, bs, epochs, lr):
    m, n = X.shape
    w = np.zeros((n,1))
    b = 0
    
    # Reshaping y.
    y = y.reshape(m,1)
    
    # Normalizing the inputs.
    x = normalize(X)
    
    # Empty list to store losses.
    losses = []
    
    # Training loop.
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            
            # Defining batches. SGD.
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            y_hat = sigmoid(np.dot(xb, w) + b)
            dw, db = gradients(xb, yb, y_hat)
            w -= lr*dw
            b -= lr*db
        l = loss(y, sigmoid(np.dot(X, w) + b))
        losses.append(l)
    return w, b, losses

In [108]:
def predict(X):
    X = normalize(X)
    preds = sigmoid(np.dot(X, w) + b)
    pred_class = []
    pred_class = [1 if i > 0.5 else 0 for i in preds]
    return np.array(pred_class)

In [109]:
# Training 
w, b, l = train(X, y, bs=100, epochs=1000, lr=0.01)

In [110]:
print(w)
print(b)


[[-0.3350003 ]
 [ 0.11288533]
 [-0.53154811]
 [-0.8896054 ]
 [ 0.09201761]
 [ 0.98162301]
 [-0.66484908]
 [-0.06827895]
 [ 1.12396372]
 [ 0.07266711]
 [-1.91111452]
 [-0.06647066]
 [-1.05485461]]
-1.161452409035434


In [112]:
w.shape

(13, 1)

In [None]:
import csv
headers = ['W0','W1','W2','W3','W4','W5','W6','W7','W8','W9','W10','W11','W10','Bias']
with open('parameters.csv', 'wb') as csvfile:
    filewriter = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    
    filewriter.writerow(headers)
    filewriter.writerow(w)
    filewriter.writerow(b)