### Project Title - Sales Prediction using Logistic Regression

#### Objective: This project is to predict whether or not a New customer will buy a product considering some parameters from previous purchase.


#### *Importing Libraries*

In [1]:
import pandas as pd #This is useful for loading the dataset
import numpy as np # This is used to perform numerical opertion on the dataset


#### *Loading Dataset file from Local Directory*

In [2]:
dataset = pd.read_csv("DigitalAd_dataset.csv")
dataset

Unnamed: 0,Age,Salary,Status
0,18,82000,0
1,29,80000,0
2,47,25000,1
3,45,26000,1
4,46,28000,1
...,...,...,...
395,35,65000,0
396,26,80000,0
397,26,52000,0
398,20,86000,0


#### *Summarizing Dataset*

In [3]:
print(dataset.shape)
print(dataset.head(5), dataset.tail(5))


(400, 3)
   Age  Salary  Status
0   18   82000       0
1   29   80000       0
2   47   25000       1
3   45   26000       1
4   46   28000       1      Age  Salary  Status
395   35   65000       0
396   26   80000       0
397   26   52000       0
398   20   86000       0
399   32   18000       0


#### *Segregating Dataset into X(Input) & Y(Output)*

In [4]:
X=dataset.iloc[:,:-1].values #The iloc function helps to segregate the dataset into input and output
print(X)
print()
Y=dataset.iloc[:,-1].values
print(Y)

[[    18  82000]
 [    29  80000]
 [    47  25000]
 [    45  26000]
 [    46  28000]
 [    48  29000]
 [    45  22000]
 [    47  49000]
 [    48  41000]
 [    45  22000]
 [    46  23000]
 [    47  20000]
 [    49  28000]
 [    47  30000]
 [    29  43000]
 [    31  18000]
 [    31  74000]
 [    27 137000]
 [    21  16000]
 [    28  44000]
 [    27  90000]
 [    35  27000]
 [    33  28000]
 [    30  49000]
 [    26  72000]
 [    27  31000]
 [    27  17000]
 [    33  51000]
 [    35 108000]
 [    30  15000]
 [    28  84000]
 [    23  20000]
 [    25  79000]
 [    27  54000]
 [    30 135000]
 [    31  89000]
 [    24  32000]
 [    18  44000]
 [    29  83000]
 [    35  23000]
 [    27  58000]
 [    24  55000]
 [    23  48000]
 [    28  79000]
 [    22  18000]
 [    32 117000]
 [    27  20000]
 [    25  87000]
 [    23  66000]
 [    32 120000]
 [    59  83000]
 [    24  58000]
 [    24  19000]
 [    23  82000]
 [    22  63000]
 [    31  68000]
 [    25  80000]
 [    24  27000]
 [    20  2300

#### *Splitting Dataset into Train & Test*

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.05, random_state = 10)

#### *Feature Scaling* 

This is used to ensure our model is not biased towards a particular data due to the difference in the weight of the data.

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

#### *Training the Dataset*

In [7]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state = 0)
model.fit(X_train, Y_train)

LogisticRegression(random_state=0)

### *Predicting, wheather or not a New customer will buy considering the Age and Salary*

In [8]:
age = int(input("Enter New Customer Age: "))
sal= int(input("Enter New Customer Salary: "))

newCust = [[age,sal]]
result = model.predict(sc.transform(newCust))
print (result)
if result == 1:
    print("Customer  will buy")
else:
    print("Customer will not buy")

Enter New Customer Age: 59
Enter New Customer Salary: 30
[1]
Customer  will buy


#### *Prediction for all Test Data*

In [9]:
Y_pred = model.predict(X_test)
print(np.concatenate((Y_pred.reshape(len(Y_pred),1), Y_test.reshape(len(Y_test),1)),1))

[[1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]]


#### *Evaluating Model - CONFUSION MATRIX*

In [10]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(Y_test, Y_pred)

print("Confusion Matrix: ")
print(cm)

print("Accuracy of the Model: {0}%".format(accuracy_score(Y_test, Y_pred)*100))

Confusion Matrix: 
[[14  1]
 [ 0  5]]
Accuracy of the Model: 95.0%
