In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
!gdown --id 1sFVv1Q1OvysqRGD0IRCoosbDSHg_w4-p

Downloading...
From: https://drive.google.com/uc?id=1sFVv1Q1OvysqRGD0IRCoosbDSHg_w4-p
To: /content/iphone_purchase_records.csv
100% 7.32k/7.32k [00:00<00:00, 22.6MB/s]


In [None]:
#step 1 - Load Data
dataset = pd.read_csv("iphone_purchase_records.csv")
dataset.sample(5)

Unnamed: 0,Gender,Age,Salary,Purchase Iphone
70,Male,25,80000,0
170,Male,21,88000,0
67,Female,23,82000,0
121,Male,37,72000,0
180,Male,26,16000,0


In [None]:
#Check if the dataset incluesd null values
dataset.isnull().sum()

Unnamed: 0,0
Gender,0
Age,0
Salary,0
Purchase Iphone,0


In [None]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Gender           400 non-null    object
 1   Age              400 non-null    int64 
 2   Salary           400 non-null    int64 
 3   Purchase Iphone  400 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 12.6+ KB


In [None]:
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
print(x[0:4,:])

[['Male' 19 19000]
 ['Male' 35 20000]
 ['Female' 26 43000]
 ['Female' 27 57000]]


In [None]:
len(set(y))

2

In [None]:
# Step 2 - Convert gender to number
labelEncoder_gender = LabelEncoder() #from sklearn
x[:,0] = labelEncoder_gender.fit_transform(x[:,0])
print(x[0:4,:])

[[1 19 19000]
 [1 35 20000]
 [0 26 43000]
 [0 27 57000]]


In [None]:
# Optional - if you want to conver x to float data type
x = np.vstack(x[:,:]).astype(float)

In [None]:
# Step 3  - Spilit Data into training and testing
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=1/4, random_state=0)

In [None]:
# Step 4 - Feature scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
# Step 5 - Logistic Regression Classifier
classifier = LogisticRegression(random_state=0, solver="liblinear")
classifier.fit(X_train, y_train)


In [None]:
# Step 6 - Predict
y_pred = classifier.predict(X_test)


In [None]:
# Step 7 - Confusion Matrix
from sklearn.metrics import confusion_matrix, \
accuracy_score, precision_score, recall_score, f1_score

cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy score: ", accuracy)

precision = precision_score(y_test, y_pred)
print("Precision score: ", precision)

recall = recall_score(y_test, y_pred)
print("Recall score: ", recall)

fscore = f1_score(y_test, y_pred)
print("F1_score score: ", fscore)


[[65  3]
 [ 6 26]]
Accuracy score:  0.91
Precision score:  0.896551724137931
Recall score:  0.8125
F1_score score:  0.8524590163934426


In [None]:
# Stepn 8  - Make New Prediction
x1= sc.transform([[1,21,40000]])
x2= sc.transform([[1,21,80000]])
x3= sc.transform([[0,21,40000]])
x4= sc.transform([[0,21,80000]])
x5= sc.transform([[1,41,40000]])
x6= sc.transform([[1,41,80000]])
x7= sc.transform([[0,41,40000]])
x8= sc.transform([[0,68,80000]])

print("Male aged 21 making $40k will buy iphone :", classifier.predict(x1))
print("Male aged 21 making $80k will buy iphone :", classifier.predict(x2))
print("Female aged 21 making $40k will buy iphone :", classifier.predict(x3))
print("Female aged 21 making $80k will buy iphone :", classifier.predict(x4))
print("Male aged 41 making $40k will buy iphone :", classifier.predict(x5))
print("Male aged 41 making $80k will buy iphone :", classifier.predict(x6))
print("Female aged 41 making $40k will buy iphone :", classifier.predict(x7))
print("Female aged 68 making $80k will buy iphone :", classifier.predict(x8))


Male aged 21 making $40k will buy iphone : [0]
Male aged 21 making $80k will buy iphone : [0]
Female aged 21 making $40k will buy iphone : [0]
Female aged 21 making $80k will buy iphone : [0]
Male aged 41 making $40k will buy iphone : [0]
Male aged 41 making $80k will buy iphone : [1]
Female aged 41 making $40k will buy iphone : [0]
Female aged 68 making $80k will buy iphone : [1]
