In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Atividade para trabalhar o pré-processamento dos dados.

Criação de modelo preditivo para diabetes e envio para verificação de peformance
no servidor.

@author: Aydano Machado <aydano.machado@gmail.com>
"""

import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
import requests

In [2]:
print('\n - Lendo o arquivo com o dataset sobre diabetes')
data = pd.read_csv('diabetes_dataset.csv')


 - Lendo o arquivo com o dataset sobre diabetes


<h1>Atributos do dataset:</h1>
<p><b>Pregnancies</b>: número de vezes grávida</p>
<p><b>Glucose</b>: concentração plasmática de glicose a 2 horas em um teste oral de tolerância à glicose</p>
<p><b>BloodPressure</b>: pressão arterial diastólica (mm Hg)</p>
<p><b>SkinThickness</b>: espessura da dobra da pele do tríceps (mm)</p>
<p><b>Insulin</b>: insulina sérica de 2 horas (mu U/ml)</p>
<p><b>BMI</b>: índice de massa corporal (peso em kg / (altura em m) ^ 2)</p>
<p><b>DiabetesPedigreeFunction</b>: função de pedigree do diabetes</p>
<p><b>Age</b>: idade (anos)</p>
<p><b>Outcome</b>: variável de classe (0 ou 1) para diabetes</p>

In [111]:
cols = [
 'Glucose',
 'Insulin',
 'Age']

In [3]:
data.corr(method='spearman')

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
Pregnancies,1.0,0.075334,0.189058,0.08236,0.037557,-0.00314,-0.046298,0.583523,0.172776
Glucose,0.075334,1.0,0.232266,0.206046,0.686334,0.226564,0.091681,0.215671,0.469662
BloodPressure,0.189058,0.232266,1.0,0.192117,0.087237,0.247141,0.033308,0.356888,0.11065
SkinThickness,0.08236,0.206046,0.192117,1.0,0.198847,0.687931,0.071795,0.206778,0.261526
Insulin,0.037557,0.686334,0.087237,0.198847,1.0,0.300452,0.149876,0.151474,0.393506
BMI,-0.00314,0.226564,0.247141,0.687931,0.300452,1.0,0.155963,0.081912,0.309852
DiabetesPedigreeFunction,-0.046298,0.091681,0.033308,0.071795,0.149876,0.155963,1.0,0.032686,0.188404
Age,0.583523,0.215671,0.356888,0.206778,0.151474,0.081912,0.032686,1.0,0.248776
Outcome,0.172776,0.469662,0.11065,0.261526,0.393506,0.309852,0.188404,0.248776,1.0


In [53]:
data.isna().sum()

Pregnancies                   0
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                       0
dtype: int64

In [110]:
#data['BloodPressure'] = data['BloodPressure'].fillna(0)
data['Insulin'] = data['Insulin'].fillna(data['Insulin'].mean())
data['Glucose'] = data['Glucose'].fillna(data['Glucose'].mean())
#data['SkinThickness'] = data['SkinThickness'].fillna(data['SkinThickness'].mean())
#data['BMI'] = data['BMI'].fillna(0)

In [112]:
#Tratamento de dados faltantes
# Criando X and y par ao algorítmo de aprendizagem de máquina.\
print(' - Criando X e y para o algoritmo de aprendizagem a partir do arquivo diabetes_dataset')
# Caso queira modificar as colunas consideradas basta algera o array a seguir.
#feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 
#                'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']

X = data[cols]
y = data.Outcome

 - Criando X e y para o algoritmo de aprendizagem a partir do arquivo diabetes_dataset


In [113]:
# padronizando a escala dos valores

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X = scaler.fit_transform(X,y)

In [106]:
# Ciando o modelo preditivo para a base trabalhada
print(' - Criando modelo preditivo')
neigh = KNeighborsClassifier(n_neighbors=3)
#neigh.fit(X, y)

 - Criando modelo preditivo


In [107]:
from sklearn.model_selection import cross_val_score
print(cross_val_score(neigh,X,y,cv=5).mean())

0.6766132723112127


In [99]:
print(' - Aplicando modelo e enviando para o servidor')

data_app = pd.read_csv('diabetes_app.csv',usecols=cols)
data_app = scaler.transform(data_app)
y_pred = neigh.predict(data_app)

 - Aplicando modelo e enviando para o servidor


In [100]:
# Enviando previsões realizadas com o modelo para o servidor

URL = "https://aydanomachado.com/mlclass/01_Preprocessing.php"

#TODO Substituir pela sua chave aqui
DEV_KEY = "Andre Santos"

# json para ser enviado para o servidor
data = {'dev_key':DEV_KEY,
        'predictions':pd.Series(y_pred).to_json(orient='values')}

# Enviando requisição e salvando o objeto resposta
r = requests.post(url = URL, data = data)

# Extraindo e imprimindo o texto da resposta
pastebin_url = r.text
print(" - Resposta do servidor:\n", r.text, "\n")

 - Resposta do servidor:
 {"status":"success","dev_key":"Andre Santos","accuracy":0.6020408163265306,"old_accuracy":0.58163265306122} 

