In [2]:
import numpy as np
import pandas as pd
import io
import matplotlib.pyplot as plt

data = pd.read_csv("/root/data/BuyComputer.csv")
data.drop(columns=['User ID',], axis=1, inplace = True)
data.head()

label = data[["Purchased"]]
print(label)

X = data[["Age", "EstimatedSalary"]]
print(X)

     Purchased
0            0
1            0
2            0
3            0
4            0
..         ...
395          1
396          1
397          1
398          0
399          1

[400 rows x 1 columns]
     Age  EstimatedSalary
0     19            19000
1     35            20000
2     26            43000
3     27            57000
4     19            76000
..   ...              ...
395   46            41000
396   51            23000
397   50            20000
398   36            33000
399   49            36000

[400 rows x 2 columns]


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X,label,test_size = 0.2, random_state=34)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

y_pred = []
len_x = len(X_train[0])
w = []
b = 0.2
print(len_x)

def sigmoid(z):
 return 1/(1 + np.exp(-z))

def log_loss(predicted,actual):
   epsilon = 1e-15
   predicted = [max(epsilon,i) for i in predicted] 
   predicted = [min(i,1-epsilon) for i in predicted]
   predicted_new = np.array(predicted)
   x = -np.mean(actual * np.log(predicted_new) + (1-actual) * np.log(1-predicted_new))
   return x

def gradients(X, y, y_hat):
    m = X.shape[0]
    dw = (1/m)*np.dot(X.T, (y_hat - y))
    db = (1/m)*np.sum((y_hat - y)) 
    return dw, db

def normalize(X):
  m, n = X.shape
  for i in range(n):
    X = (X - X.mean(axis = 0)) / X.std(axis = 0)
  return X


def train(X, y, bs, epochs, lr):
    m, n = X.shape
    w = np.zeros((n,1))
    b = 0

    y = y.values.reshape(m,1)
    x = normalize(X)

    losses = []

    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            y_hat = sigmoid(np.dot(xb, w) + b)
            dw, db = gradients(xb, yb, y_hat)
            w -= lr*dw
            b -= lr*db
        l = log_loss(y, sigmoid(np.dot(X, w) + b))
        losses.append(l)
    return w, b, losses


def prediction_function(inputs):
  x = normalize(inputs)
  preds = sigmoid(np.dot(inputs, w) + b)
  pred_class = []    
  pred_class = [1 if i > 0.5 else 0 for i in preds]  
  return np.array(pred_class)


w, b, l = train(X_train, y_train, bs=100, epochs=1000, lr=0.01)


2


In [8]:
from sklearn.metrics import accuracy_score

y_predict = prediction_function(X_test)
print(accuracy_score(y_test, y_predict))

0.825


In [13]:
# Using Logistic Regression Model

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

LR = LogisticRegression(random_state = 0)

X_train1, X_test1, y_train1, y_test1 = train_test_split(X,label,test_size = 0.2, random_state=34)

sc = StandardScaler()
X_train1 = sc.fit_transform(X_train1)
X_test1 = sc.fit_transform(X_test1)

clf = LR.fit(X_train1,y_train1)

y_pred1 = clf.predict(X_test1)
print(y_pred1)

print(accuracy_score(y_test1,y_pred1))

[0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0
 1 0 0 1 1 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 1 0 1 0 0 0 1 1 1 0 1 0 0 0 0
 0 1 0 0 0 0]
0.8


  y = column_or_1d(y, warn=True)


In [14]:
import sklearn.metrics as metrics
print(metrics.classification_report(y_test1, y_pred1))

              precision    recall  f1-score   support

           0       0.81      0.88      0.84        49
           1       0.78      0.68      0.72        31

    accuracy                           0.80        80
   macro avg       0.79      0.78      0.78        80
weighted avg       0.80      0.80      0.80        80



In [15]:
output = clf.predict([[28,7600]])
print(output)

[1]
