# Logistic Regression
---
For an in depth explanation of data preprocessing, please review [Data Preprocessing](../../Data_Preprocessing/data_preprocessing.ipynb)

## Import Libraries

In [114]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

## Import Dataset

In [115]:
dataset = pd.read_csv('../ANN/dataset.csv')
print(dataset.loc[0])

RowNumber                  1
CustomerId          15634602
Surname             Hargrave
CreditScore              619
Geography             France
Gender                Female
Age                       42
Tenure                     2
Balance                  0.0
NumOfProducts              1
HasCrCard                  1
IsActiveMember             1
EstimatedSalary    101348.88
Exited                     1
Name: 0, dtype: object


In [116]:
x = dataset.iloc[:,3:-1].values
y = dataset.iloc[:,-1].values
print(f'X\n{x[0:5]}')
print(f'Y\n{y[0:5]}')

X
[[619 'France' 'Female' 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 'Female' 42 8 159660.8 3 1 0 113931.57]
 [699 'France' 'Female' 39 1 0.0 2 0 0 93826.63]
 [850 'Spain' 'Female' 43 2 125510.82 1 1 1 79084.1]]
Y
[1 0 1 0 0]


## Encode The Dataset

In [117]:
le = LabelEncoder()
x[:,2] = le.fit_transform(x[:,2])

ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(), [1])], remainder='passthrough')
x = np.array(ct.fit_transform(x))

print(x[0:12:2])

[[1.0 0.0 0.0 619 0 42 2 0.0 1 1 1 101348.88]
 [1.0 0.0 0.0 502 0 42 8 159660.8 3 1 0 113931.57]
 [0.0 0.0 1.0 850 0 43 2 125510.82 1 1 1 79084.1]
 [1.0 0.0 0.0 822 1 50 7 0.0 2 1 1 10062.8]
 [1.0 0.0 0.0 501 1 44 4 142051.07 2 0 1 74940.5]
 [1.0 0.0 0.0 528 1 31 6 102016.72 2 0 0 80181.12]]


## Split Data Set

In [118]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=.02)
print(f"X Train:\n{x_train[0]}")
print(f"Y Train:\n{y_train[0]}")
print(f"X Test:\n{x_test[0]}")
print(f"Y Test:\n{y_test[0]}")

X Train:
[0.0 0.0 1.0 588 1 32 3 109109.33 1 0 1 4993.94]
Y Train:
0
X Test:
[1.0 0.0 0.0 785 0 38 1 0.0 1 1 0 134964.85]
Y Test:
1


## Feature Scaling

In [119]:
sc = StandardScaler()
# Scaling only needed cols for Train Set
x_train[:,3:4] = sc.fit_transform(x_train[:,3:4])
x_train[:,5:8] = sc.fit_transform(x_train[:,5:8])
x_train[:,-1:] = sc.fit_transform(x_train[:,-1:])
# Scaling only needed cols for Test Set
x_test[:,4:5] = sc.fit_transform(x_test[:,4:5])
x_test[:,5:8] = sc.fit_transform(x_test[:,5:8])
x_test[:,-1:] = sc.fit_transform(x_test[:,-1:])

print(f'Train: {x_train[0]}')
print(f'Test: {x_test[0]}')

Train: [0.0 0.0 1.0 -0.6462814544394493 1 -0.6606168891102312 -0.6985686807706091
 0.5193583591438575 1 0 1 -1.6526238647408258]
Test: [1.0 0.0 0.0 785 -0.9704367948586523 -0.0796510460250523
 -1.2682126227771606 -1.0109077246715523 1 1 0 0.6647054897818259]


## Training The Model

## Predict New Result

## Predicting The Test Set

## Making The Confusion Matrix

## Visualize The Test Results