In [1]:
# importacao de bibliotecas necessarias

# bibliotecas de redes neurais
import tensorflow as tf
from tensorflow import keras
from keras.utils.vis_utils import plot_model

# bibliotecas de manipulacao de variaveis de ambiente e acesso a diretorios
import os, warnings
import glob

# bibliotecas de manipulacao e visualizacao de dados
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Preparação dos Dados

Retomamos a mesma preparação dos dados abordada na [primeira entrega](https://github.com/GFerrazzo/INE5644/blob/main/Primeira%20Entrega%20-%20Trabalho%20Final.ipynb). Dessa vez, retiramos os blocos de código com propósito de visualização da preparação.

In [2]:
# df = pd.read_csv('https://raw.githubusercontent.com/GFerrazzo/INE5644/main/KaggleV2-May-2016.csv', sep = ',')

df = pd.read_csv('KaggleV2-May-2016.csv', sep = ',')

In [3]:
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])

In [4]:
df['Gender'] = df['Gender'].map({'M' : 1, 'F' : 0})
df['No-show'] = df['No-show'].map({'Yes' : 1, 'No' : 0})

# esse bloco foi alterado para nao utilizarmos mais o dtype boolean. o Keras teve algum problema em trabalhar com esse dtype.

In [5]:
df['ScheduleToAppointment'] = (df['AppointmentDay'] - df['ScheduledDay']).astype('timedelta64[D]') + 1

In [6]:
df['ScheduleWeekDay'] = df['ScheduledDay'].dt.weekday

In [7]:
df.drop(columns=['PatientId','AppointmentID','ScheduledDay','AppointmentDay'], inplace=True)

In [8]:
df.drop(df[df['Age'] < 0].index, inplace=True)

In [9]:
df.drop(df[df['ScheduleToAppointment'] < 0].index, inplace=True)

In [10]:
df[df['No-show'] == True].shape[0]/df.shape[0]

0.2018982817745044

In [11]:
df.drop(columns=['Neighbourhood'], inplace=True)

In [12]:
df['Age'] = pd.cut(df['Age'], bins=29, precision=0)

In [13]:
# df['Handcap'] = df['Handcap'].map({1 : True, 2 : True, 3 : True, 4 : True, 0 : False})

In [14]:
df['ScheduleToAppointment'] = pd.cut(df['ScheduleToAppointment'], bins=30, precision=0)

In [15]:
df.sample(5)

Unnamed: 0,Gender,Age,Scholarship,Hipertension,Diabetes,Alcoholism,Handcap,SMS_received,No-show,ScheduleToAppointment,ScheduleWeekDay
39157,1,"(8.0, 12.0]",0,0,0,0,0,0,0,"(-0.0, 6.0]",3
69323,0,"(44.0, 48.0]",0,0,0,0,0,0,0,"(-0.0, 6.0]",2
30019,1,"(-0.0, 4.0]",0,0,0,0,0,0,0,"(-0.0, 6.0]",4
19948,1,"(52.0, 56.0]",0,1,1,0,0,1,0,"(6.0, 12.0]",0
75185,0,"(16.0, 20.0]",1,0,0,0,0,1,1,"(6.0, 12.0]",0


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 110521 entries, 0 to 110526
Data columns (total 11 columns):
 #   Column                 Non-Null Count   Dtype   
---  ------                 --------------   -----   
 0   Gender                 110521 non-null  int64   
 1   Age                    110521 non-null  category
 2   Scholarship            110521 non-null  int64   
 3   Hipertension           110521 non-null  int64   
 4   Diabetes               110521 non-null  int64   
 5   Alcoholism             110521 non-null  int64   
 6   Handcap                110521 non-null  int64   
 7   SMS_received           110521 non-null  int64   
 8   No-show                110521 non-null  int64   
 9   ScheduleToAppointment  110521 non-null  category
 10  ScheduleWeekDay        110521 non-null  int64   
dtypes: category(2), int64(9)
memory usage: 8.6 MB


In [17]:
df['Age'] = df['Age'].cat.codes
df['ScheduleToAppointment'] = df['ScheduleToAppointment'].cat.codes

# Leitura do Dataset pelo Tensorflow

In [26]:
df_model = df.copy()

In [27]:
target = df_model.pop('No-show')

In [28]:
dataset = tf.data.Dataset.from_tensor_slices((df_model.values, target.values))

In [29]:
for feat, targ in dataset.take(10):
    print('Features: {}, Target: {}'.format(feat, targ))

Features: [ 0 15  0  1  0  0  0  0  0  4], Target: 0
Features: [ 1 14  0  0  0  0  0  0  0  4], Target: 0
Features: [ 0 15  0  0  0  0  0  0  0  4], Target: 0
Features: [0 2 0 0 0 0 0 0 0 4], Target: 0
Features: [ 0 14  0  1  1  0  0  0  0  4], Target: 0
Features: [ 0 19  0  1  0  0  0  0  0  2], Target: 0
Features: [0 5 0 0 0 0 0 0 0 2], Target: 1
Features: [0 9 0 0 0 0 0 0 0 2], Target: 1
Features: [0 5 0 0 0 0 0 0 0 4], Target: 0
Features: [0 4 0 0 0 0 0 0 0 2], Target: 0


In [30]:
train_dataset = dataset.shuffle(len(df_model)).batch(32)

In [31]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(11, activation='relu'),
    tf.keras.layers.Dense(11, activation='relu'),
    tf.keras.layers.Dense(1)
  ])

In [32]:
model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'])

In [33]:
model_performance = model.fit(train_dataset, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
