# Artificial Neural Network

## Importing the dataset

In [1]:
dataset = read.csv('Churn_Modelling.csv')
dataset = dataset[4:14]
head(dataset)

CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
619,France,Female,42,2,0.0,1,1,1,101348.88,1
608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
502,France,Female,42,8,159660.8,3,1,0,113931.57,1
699,France,Female,39,1,0.0,2,0,0,93826.63,0
850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0
645,Spain,Male,44,8,113755.78,2,1,0,149756.71,1


## Encoding the categorical variables as factors

In [2]:
dataset$Geography = as.numeric(factor(dataset$Geography,
                                      levels=c('France', 'Spain', 'Germany'),
                                      labels=c(1, 2, 3)))

dataset$Gender = as.numeric(factor(dataset$Gender,
                                   levels=c('Female', 'Male'),
                                   labels=c(1, 2)))

In [3]:
head(dataset)

CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
619,1,1,42,2,0.0,1,1,1,101348.88,1
608,2,1,41,1,83807.86,1,0,1,112542.58,0
502,1,1,42,8,159660.8,3,1,0,113931.57,1
699,1,1,39,1,0.0,2,0,0,93826.63,0
850,2,1,43,2,125510.82,1,1,1,79084.1,0
645,2,2,44,8,113755.78,2,1,0,149756.71,1


## Splitting the dataset into the Training set and Test set

In [4]:
library(caTools)

set.seed(123)
split = sample.split(dataset$Exited, SplitRatio=0.8)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)

"package 'caTools' was built under R version 3.6.3"

## Feature Scaling

In [5]:
training_set[-11] = scale(training_set[-11])
test_set[-11] = scale(test_set[-11])

In [6]:
head(training_set)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
1,-0.32756459,-0.8895031,-1.092995,0.2962052,-1.0313943,-1.2190517,-0.911112,0.6518863,0.970619,0.0216855,1
3,-1.53810353,-0.8895031,-1.092995,0.2962052,1.043237,1.3330389,2.5466186,0.6518863,-1.030142,0.2400424,1
5,2.06247381,0.3202937,-1.092995,0.3914787,-1.0313943,0.7871702,-0.911112,0.6518863,0.970619,-0.364692,0
6,-0.05855594,0.3202937,0.9148029,0.4867522,1.043237,0.5992723,0.8177533,0.6518863,-1.030142,0.861743,1
7,1.77277219,-0.8895031,0.9148029,1.058393,0.6974651,-1.2190517,0.8177533,0.6518863,0.970619,-1.5624706,0
9,-1.54845001,-0.8895031,0.9148029,0.4867522,-0.3398505,1.051557,0.8177533,-1.533818,0.970619,-0.436599,0


## Fitting ANN to the Training set

In [9]:
# install.packages('h2o')

In [10]:
library(h2o)

h2o.init(nthreads=-1)
model = h2o.deeplearning(y='Exited',
                         training_frame=as.h2o(training_set),
                         activation='Rectifier',
                         hidden=c(5,5),
                         epochs=100,
                         train_samples_per_iteration=-2)


----------------------------------------------------------------------

Your next step is to start H2O:
    > h2o.init()

For H2O package documentation, ask for help:
    > ??h2o

After starting H2O, you can use the Web UI at http://localhost:54321
For more information visit https://docs.h2o.ai

----------------------------------------------------------------------


Attaching package: 'h2o'

The following objects are masked from 'package:stats':

    cor, sd, var

The following objects are masked from 'package:base':

    %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
    colnames<-, ifelse, is.character, is.factor, is.numeric, log,
    log10, log1p, log2, round, signif, trunc




H2O is not running yet, starting it now...

Note:  In case of errors look at the following log files:
    C:\Users\luisa\AppData\Local\Temp\Rtmpm48E7K\file204c6df746d9/h2o_luisa_started_from_r.out
    C:\Users\luisa\AppData\Local\Temp\Rtmpm48E7K\file204c670b1a5f/h2o_luisa_started_from_r.err


Starting H2O JVM and connecting:  Connection successful!

R is connected to the H2O cluster: 
    H2O cluster uptime:         5 seconds 286 milliseconds 
    H2O cluster timezone:       America/Mexico_City 
    H2O data parsing timezone:  UTC 
    H2O cluster version:        3.38.0.1 
    H2O cluster version age:    2 months and 4 days  
    H2O cluster name:           H2O_started_from_R_luisa_cti312 
    H2O cluster total nodes:    1 
    H2O cluster total memory:   3.98 GB 
    H2O cluster total cores:    8 
    H2O cluster allowed cores:  8 
    H2O cluster healthy:        TRUE 
    H2O Connection ip:          localhost 
    H2O Connection port:        54321 
    H2O Connection proxy:       NA

"data.table cannot be used without R package bit64 version 0.9.7 or higher.  Please upgrade to take advangage of data.table speedups."



"We have detected that your response column has only 2 unique values (0/1). If you wish to train a binary model instead of a regression model, convert your target column to categorical before training..
"



## Predicting the Test set results

In [13]:
y_pred = h2o.predict(model, newdata=as.h2o(test_set[-11]))
y_pred = (y_pred > 0.5)
y_pred = as.vector(y_pred)
head(data.frame(y_test=test_set[11], y_pred=y_pred))

"data.table cannot be used without R package bit64 version 0.9.7 or higher.  Please upgrade to take advangage of data.table speedups."



Unnamed: 0,Exited,y_pred
2,0,0
4,0,0
8,1,1
12,0,0
13,0,0
14,0,0


## Making the Confusion Matrix

In [14]:
cm = table(test_set[, 11], y_pred)
cm

   y_pred
       0    1
  0 1546   47
  1  225  182

In [None]:
h2o.shutdown()