# Logistic Regression

### This dataset contains information of users from a company’s database. It contains information about User ID, Gender, Age, Estimated Salary, Purchased. We are using this dataset for predicting that a user will purchase the company’s newly launched product or not.

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
from sklearn.metrics import accuracy_score #Using sklearn only to check accuracy

In [2]:
user_data = pd.read_csv('User_Data.csv')

In [3]:
user_data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [4]:
X = user_data.drop(['Gender', 'User ID', 'Purchased'], axis = 1)
y = user_data.drop(['Gender', 'User ID', 'Age', 'EstimatedSalary'], axis = 1)
#Scale Data
X['Age'] = (X['Age']-np.mean(X['Age']))/np.std(X['Age'])
X['EstimatedSalary'] = (X['EstimatedSalary']-np.mean(X['EstimatedSalary']))/np.std(X['EstimatedSalary'])

In [5]:
X = np.array(X)
y = np.array(y)

In [6]:
k = 0.6
p = int(k*400)
X_train = X[:p]
X_test = X[p:]
y_train = y[:p]
y_test = y[p:]

In [7]:
def weightInitialization(n_features):
    w = np.zeros((1,n_features))
    b = 0
    return w,b

In [8]:
def sigmoid_activation(result):
    final_result = 1/(1+np.exp(-result))
    return final_result

In [9]:
def opt(w, b, X, Y):
    m = X.shape[0]
    final_result = sigmoid_activation(np.dot(w,X.T)+b)
    Y_T = Y.T
    cost = (-1/m)*(np.sum((Y_T*np.log(final_result)) + ((1-Y_T)*(np.log(1-final_result)))))
    dw = (1/m)*(np.dot(X.T, (final_result-Y.T).T))
    db = (1/m)*(np.sum(final_result-Y.T))
    grads = {"dw": dw, "db": db}
    
    return grads, cost

In [10]:
def pred_model(w, b, X, Y, learning_rate, no_iterations):
    costs = []
    for i in range(no_iterations):
        
        grads, cost = opt(w,b,X,Y)
        
        dw = grads["dw"]
        db = grads["db"]
        w = w - (learning_rate * (dw.T))
        b = b - (learning_rate * db)

        if (i % 100 == 0):
            costs.append(cost)
            print("Cost after %i iteration is %f" %(i, cost))
    
    #final parameters
    coeff = {"w": w, "b": b}
    gradient = {"dw": dw, "db": db}
    
    return coeff, gradient, costs

In [11]:
def predict(final_pred, m):
    y_pred = np.zeros((1,m))
    for i in range(final_pred.shape[1]):
        if final_pred[0][i] > 0.5:
            y_pred[0][i] = 1
    return y_pred

In [12]:
n_features = X_train.shape[1]
w, b = weightInitialization(n_features) #Initailizing model parameters

In [13]:
#Gradient Descent
coeff, gradient, costs = pred_model(w, b, X_train, y_train, learning_rate=0.005,no_iterations=4500)

Cost after 0 iteration is 0.693147
Cost after 100 iteration is 0.594624
Cost after 200 iteration is 0.527106
Cost after 300 iteration is 0.479576
Cost after 400 iteration is 0.445061
Cost after 500 iteration is 0.419243
Cost after 600 iteration is 0.399413
Cost after 700 iteration is 0.383830
Cost after 800 iteration is 0.371341
Cost after 900 iteration is 0.361161
Cost after 1000 iteration is 0.352741
Cost after 1100 iteration is 0.345689
Cost after 1200 iteration is 0.339716
Cost after 1300 iteration is 0.334608
Cost after 1400 iteration is 0.330202
Cost after 1500 iteration is 0.326372
Cost after 1600 iteration is 0.323020
Cost after 1700 iteration is 0.320069
Cost after 1800 iteration is 0.317456
Cost after 1900 iteration is 0.315131
Cost after 2000 iteration is 0.313052
Cost after 2100 iteration is 0.311186
Cost after 2200 iteration is 0.309505
Cost after 2300 iteration is 0.307985
Cost after 2400 iteration is 0.306605
Cost after 2500 iteration is 0.305350
Cost after 2600 iteratio

In [14]:
#Final prediction
w = coeff["w"]
b = coeff["b"]
print('Optimized weights', w) #Weights
print('Optimized intercept',b) #Bias

Optimized weights [[1.343175   1.05893885]]
Optimized intercept -1.23698439912055


In [15]:
final_train_pred = sigmoid_activation(np.dot(w,X_train.T)+b)
final_test_pred = sigmoid_activation(np.dot(w,X_test.T)+b)

In [18]:
m_tr =  X_train.shape[0]
m_ts =  X_test.shape[0]
y_train_pred = predict(final_train_pred, m_tr)
y_test_pred = predict(final_test_pred, m_ts)
print('Test Accuracy',accuracy_score(y_test_pred.T, y_test))
print('Training Accuracy',accuracy_score(y_train_pred.T, y_train))

Test Accuracy 0.75625
Training Accuracy 0.8666666666666667
