# Artificial Neural Network

This dataset is a collection of basic health biological signal data.
The goal is to determine the presence or absence of smoking through bio-signals using deep learning

### Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

## Part 1 - Data Preprocessing

### Importing the dataset

In [53]:
df=pd.read_csv('smoking.csv')

In [54]:
df.head(10)

Unnamed: 0,ID,gender,age,height(cm),weight(kg),waist(cm),eyesight(left),eyesight(right),hearing(left),hearing(right),...,hemoglobin,Urine protein,serum creatinine,AST,ALT,Gtp,oral,dental caries,tartar,smoking
0,0,F,40,155,60,81.3,1.2,1.0,1.0,1.0,...,12.9,1.0,0.7,18.0,19.0,27.0,Y,0,Y,0
1,1,F,40,160,60,81.0,0.8,0.6,1.0,1.0,...,12.7,1.0,0.6,22.0,19.0,18.0,Y,0,Y,0
2,2,M,55,170,60,80.0,0.8,0.8,1.0,1.0,...,15.8,1.0,1.0,21.0,16.0,22.0,Y,0,N,1
3,3,M,40,165,70,88.0,1.5,1.5,1.0,1.0,...,14.7,1.0,1.0,19.0,26.0,18.0,Y,0,Y,0
4,4,F,40,155,60,86.0,1.0,1.0,1.0,1.0,...,12.5,1.0,0.6,16.0,14.0,22.0,Y,0,N,0
5,5,M,30,180,75,85.0,1.2,1.2,1.0,1.0,...,16.2,1.0,1.2,18.0,27.0,33.0,Y,0,Y,0
6,6,M,40,160,60,85.5,1.0,1.0,1.0,1.0,...,17.0,1.0,0.7,21.0,27.0,39.0,Y,1,Y,1
7,7,M,45,165,90,96.0,1.2,1.0,1.0,1.0,...,15.0,1.0,1.3,38.0,71.0,111.0,Y,0,Y,0
8,9,F,50,150,60,85.0,0.7,0.8,1.0,1.0,...,13.7,1.0,0.8,31.0,31.0,14.0,Y,0,N,0
9,10,M,45,175,75,89.0,1.0,1.0,1.0,1.0,...,16.0,1.0,0.8,26.0,24.0,63.0,Y,0,N,0


In [55]:
df = df.replace({'Y':1,'N':0})
df.head()

Unnamed: 0,ID,gender,age,height(cm),weight(kg),waist(cm),eyesight(left),eyesight(right),hearing(left),hearing(right),...,hemoglobin,Urine protein,serum creatinine,AST,ALT,Gtp,oral,dental caries,tartar,smoking
0,0,F,40,155,60,81.3,1.2,1.0,1.0,1.0,...,12.9,1.0,0.7,18.0,19.0,27.0,1,0,1,0
1,1,F,40,160,60,81.0,0.8,0.6,1.0,1.0,...,12.7,1.0,0.6,22.0,19.0,18.0,1,0,1,0
2,2,M,55,170,60,80.0,0.8,0.8,1.0,1.0,...,15.8,1.0,1.0,21.0,16.0,22.0,1,0,0,1
3,3,M,40,165,70,88.0,1.5,1.5,1.0,1.0,...,14.7,1.0,1.0,19.0,26.0,18.0,1,0,1,0
4,4,F,40,155,60,86.0,1.0,1.0,1.0,1.0,...,12.5,1.0,0.6,16.0,14.0,22.0,1,0,0,0


In [75]:
df.columns

Index(['ID', 'gender', 'age', 'height(cm)', 'weight(kg)', 'waist(cm)',
       'eyesight(left)', 'eyesight(right)', 'hearing(left)', 'hearing(right)',
       'systolic', 'relaxation', 'fasting blood sugar', 'Cholesterol',
       'triglyceride', 'HDL', 'LDL', 'hemoglobin', 'Urine protein',
       'serum creatinine', 'AST', 'ALT', 'Gtp', 'oral', 'dental caries',
       'tartar', 'smoking'],
      dtype='object')

In [56]:
X=df.iloc[:,1:-1].values
y=df.iloc[:,-1].values

In [43]:
X

array([['F', 40, 155, ..., 'Y', 0, 'Y'],
       ['F', 40, 160, ..., 'Y', 0, 'Y'],
       ['M', 55, 170, ..., 'Y', 0, 'N'],
       ...,
       ['F', 55, 160, ..., 'Y', 0, 'N'],
       ['M', 60, 165, ..., 'Y', 0, 'N'],
       ['M', 55, 160, ..., 'Y', 0, 'Y']], dtype=object)

### Encoding categorical data

Label Encoding the "Gender" column

In [57]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
X[:,0]=le.fit_transform(X[:,0])

In [74]:
X.shape

(55692, 25)

### Splitting the dataset into the Training set and Test set

In [59]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [60]:
X_test

array([[1, 60, 155, ..., 1, 0, 1],
       [1, 40, 175, ..., 1, 0, 1],
       [1, 25, 170, ..., 1, 0, 0],
       ...,
       [1, 30, 165, ..., 1, 0, 0],
       [1, 45, 175, ..., 1, 1, 1],
       [1, 40, 160, ..., 1, 0, 1]], dtype=object)

### Feature Scaling

In [61]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [73]:
X_train.shape

(44553, 25)

## Part 2 - Building the ANN

### Initializing the ANN

In [63]:
ann=tf.keras.models.Sequential()

### Adding the input layer and the first hidden layer

In [64]:
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))

### Adding the second hidden layer

In [65]:
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))

### Adding the output layer

In [66]:
ann.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

## Part 3 - Training the ANN

### Compiling the ANN

In [67]:
ann.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

### Training the ANN on the Training set

In [68]:
ann.fit(X_train,y_train,batch_size=32,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f9c100ebeb0>

## Part 4 - Making the predictions and evaluating the model

### Predicting the result of a single observation

Let's predict this persons information which is second in our dataset

In [87]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print(df.iloc[[2]])

   ID gender  age  height(cm)  weight(kg)  waist(cm)  eyesight(left)  \
2   2      M   55         170          60       80.0             0.8   

   eyesight(right)  hearing(left)  hearing(right)  systolic  relaxation  \
2              0.8            1.0             1.0     138.0        86.0   

   fasting blood sugar  Cholesterol  triglyceride   HDL    LDL  hemoglobin  \
2                 89.0        242.0         182.0  55.0  151.0        15.8   

   Urine protein  serum creatinine   AST   ALT   Gtp  oral  dental caries  \
2            1.0               1.0  21.0  16.0  22.0     1              0   

   tartar  smoking  
2       0        1  


In [81]:
print(ann.predict(sc.transform([[1,55,170,60,80.0,0.8,0.8,1.0,1.0,138.0,86.0,89.0,242.0,182.0,55.0,151.0,15.8,1.0,1.0,21.0,16.0,22.0,1,0,0]])))

[[0.5982854]]


In [84]:
print(ann.predict(sc.transform([[1,55,170,60,80.0,0.8,0.8,1.0,1.0,138.0,86.0,89.0,242.0,182.0,55.0,151.0,15.8,1.0,1.0,21.0,16.0,22.0,1,0,0]]))>0.5)

[[ True]]


This person really smokes siggaretes

### Predicting the Test set results

In [86]:
y_pred=ann.predict(X_test)
y_pred=y_pred>0.5
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [1 1]
 [0 0]
 ...
 [1 0]
 [1 1]
 [1 1]]


### Making the Confusion Matrix

In [70]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5394 1682]
 [1008 3055]]


0.7585061495645928