# SALE PREDICTION USING LOGISTIC REGRESSION

## Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot
%matplotlib inline

## Loading Dataset

In [3]:
df = pd.read_csv("DigitalAd_dataset.csv")
df.head()

Unnamed: 0,Age,Salary,Status
0,18,82000,0
1,29,80000,0
2,47,25000,1
3,45,26000,1
4,46,28000,1


## Summarising Dataset

In [13]:
df.shape

(400, 3)

In [12]:
df[['Age', 'Salary']].describe()

Unnamed: 0,Age,Salary
count,400.0,400.0
mean,37.655,69742.5
std,10.482877,34096.960282
min,18.0,15000.0
25%,29.75,43000.0
50%,37.0,70000.0
75%,46.0,88000.0
max,60.0,150000.0


In [8]:
df.nunique()

Age        43
Salary    117
Status      2
dtype: int64

In [10]:
df.isna().any()

Age       False
Salary    False
Status    False
dtype: bool

## Segregate Dataset into X(Input/IndependentVariable) & Y(Output/DependentVariable)

In [33]:
X = df.iloc[:, :-1]
Y = df.iloc[:, -1]

In [34]:
print(np.array(X))
print(np.array(Y))

[[    18  82000]
 [    29  80000]
 [    47  25000]
 [    45  26000]
 [    46  28000]
 [    48  29000]
 [    45  22000]
 [    47  49000]
 [    48  41000]
 [    45  22000]
 [    46  23000]
 [    47  20000]
 [    49  28000]
 [    47  30000]
 [    29  43000]
 [    31  18000]
 [    31  74000]
 [    27 137000]
 [    21  16000]
 [    28  44000]
 [    27  90000]
 [    35  27000]
 [    33  28000]
 [    30  49000]
 [    26  72000]
 [    27  31000]
 [    27  17000]
 [    33  51000]
 [    35 108000]
 [    30  15000]
 [    28  84000]
 [    23  20000]
 [    25  79000]
 [    27  54000]
 [    30 135000]
 [    31  89000]
 [    24  32000]
 [    18  44000]
 [    29  83000]
 [    35  23000]
 [    27  58000]
 [    24  55000]
 [    23  48000]
 [    28  79000]
 [    22  18000]
 [    32 117000]
 [    27  20000]
 [    25  87000]
 [    23  66000]
 [    32 120000]
 [    59  83000]
 [    24  58000]
 [    24  19000]
 [    23  82000]
 [    22  63000]
 [    31  68000]
 [    25  80000]
 [    24  27000]
 [    20  2300

## Splitting Dataset into Train & Test

In [35]:
from sklearn.model_selection import train_test_split
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 0.25, random_state = 0)

## Feature Scaling (Used Method  :  Standardisation)

### Xnorm = (X - mean) / SD

In [36]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_Train = sc.fit_transform(X_Train)  # Standardisation w.r.t mean and standard deviation each field
X_Test = sc.transform(X_Test)        # Standardisation by combining all fields into one (Used just to differentiate mode of test data from train data to develop generalised prediction models)
X_Train[:5]

array([[ 0.97446763,  0.56400811],
       [-1.52088611, -1.28151705],
       [ 0.30264162,  0.03671521],
       [-0.36918439, -0.81281224],
       [ 0.49459191,  1.20847722]])

  ## Training the model

In [37]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state = 0)
model.fit(X_Train, Y_Train)

LogisticRegression(random_state=0)

## Prediction for Test Data

In [55]:
Y_Pred = model.predict(X_Test)
Y_Test = np.array(Y_Test)
Pred_Data = np.concatenate((Y_Test.reshape(len(Y_Test), 1), Y_Pred.reshape(len(Y_Pred), 1)), axis = 1)
#print(Pred_Data)
df_pred = pd.DataFrame(Pred_Data, columns = ['Actual', 'Prediction'])
df_pred.head(10)

Unnamed: 0,Actual,Prediction
0,1,0
1,1,0
2,1,1
3,1,1
4,0,0
5,0,0
6,0,0
7,1,1
8,0,0
9,0,0


## Evaluating Model - CONFUSION MATRIX

In [59]:
from sklearn.metrics import confusion_matrix, accuracy_score
Confusion_Matrix = confusion_matrix(Y_Test, Y_Pred)
print("Confusion_Matrix :\n", Confusion_Matrix)
print("Accuracy of the Model: {0}%".format(accuracy_score(Y_Test, Y_Pred)*100))

Confusion_Matrix :
 [[61  0]
 [20 19]]
Accuracy of the Model: 80.0%


## Prediction for new Data Input

In [62]:
age = int(input("Enter New Customer Age: "))
sal = int(input("Enter New Customer Salary: "))
newCust = np.array([[age, sal]])
result = model.predict(sc.transform(newCust))   # Standardising of data as same as Test Model
print(result)
if result:
  print("Customer will Buy")
else:
  print("Customer won't Buy")

Enter New Customer Age: 25
Enter New Customer Salary: 400000
[1]
Customer will Buy


__Thank You__