In [38]:
import pandas as pd

caminho = '/content/diabetes_dataset.csv'

try:
  df = pd.read_csv(caminho)

  print(df.head())
except FileNotFoundError:
  print(f"O arquivo '{caminho}' nao foi encontrado")
except Exception as e:
  print(f"Ocorreu um erro ao ler o arquivo: {e}")

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6    148.0           72.0           35.0      NaN  33.6   
1            1     85.0           66.0           29.0      NaN  26.6   
2            8    183.0           64.0            NaN      NaN  23.3   
3            0    137.0           40.0           35.0    168.0  43.1   
4            5    116.0           74.0            NaN      NaN  25.6   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     2.288   33        1  
4                     0.201   30        0  


In [39]:
print(df.head(10))

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6    148.0           72.0           35.0      NaN  33.6   
1            1     85.0           66.0           29.0      NaN  26.6   
2            8    183.0           64.0            NaN      NaN  23.3   
3            0    137.0           40.0           35.0    168.0  43.1   
4            5    116.0           74.0            NaN      NaN  25.6   
5           10    115.0            NaN            NaN      NaN  35.3   
6            2    197.0           70.0           45.0    543.0  30.5   
7            8    125.0           96.0            NaN      NaN   NaN   
8            4    110.0           92.0            NaN      NaN  37.6   
9           10    168.0           74.0            NaN      NaN  38.0   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     2.288   3

In [40]:
!pip freeze > requirements.txt

In [32]:
dados_faltantes_por_coluna = df.isnull().sum()
print(f"Dados faltantes por coluna:\n{dados_faltantes_por_coluna}")

Dados faltantes por coluna:
Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [33]:
#Porcentagem de dados faltantes por coluna
total_linhas = len(df)
porcentagem_faltantes_por_coluna = (dados_faltantes_por_coluna / total_linhas)
print(f"Número total de linhas: {total_linhas}")
print(f"\nPorcentagem de dados faltantes por coluna:\n{porcentagem_faltantes_por_coluna}")

Número total de linhas: 572

Porcentagem de dados faltantes por coluna:
Pregnancies                 0.0
Glucose                     0.0
BloodPressure               0.0
SkinThickness               0.0
Insulin                     0.0
BMI                         0.0
DiabetesPedigreeFunction    0.0
Age                         0.0
Outcome                     0.0
dtype: float64


In [21]:
mediana_glucose = df['Glucose'].median()
mediana_bloodpressure = df['BloodPressure'].median()
mediana_bmi = df['BMI'].median()

print(f"Mediana da coluna 'glucose': {mediana_glucose}")
print(f"Mediana da coluna 'bloodpressure': {mediana_bloodpressure}")
print(f"Mediana da coluna 'BMI': {mediana_bmi}")

Mediana da coluna 'glucose': 117.0
Mediana da coluna 'bloodpressure': 72.0
Mediana da coluna 'BMI': 32.0


In [23]:
df['Glucose'].fillna(mediana_glucose)
df['BloodPressure'].fillna(mediana_bloodpressure)
df['BMI'].fillna(mediana_bmi)

Unnamed: 0,BMI
0,33.6
1,26.6
2,23.3
3,43.1
4,25.6
...,...
567,22.5
568,36.8
569,26.2
570,30.1


In [29]:
print("Verificando valores nulos após imputação:")
print(df[['Glucose', 'BloodPressure', 'BMI']].isnull().sum())

Verificando valores nulos após imputação:
Glucose          0
BloodPressure    0
BMI              0
dtype: int64


In [31]:
#KNN Imput

from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler

colunas_para_imputar_knn = ['SkinThickness', 'Insulin', 'Pregnancies', 'Glucose', 'BloodPressure', 'BMI', 'DiabetesPedigreeFunction', 'Age']

df_knn_imputacao = df[colunas_para_imputar_knn].copy()

scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df_knn_imputacao)
df_scaled = pd.DataFrame(df_scaled, columns=colunas_para_imputar_knn, index=df.index)

imputer = KNNImputer(n_neighbors=5)

df_imputed_scaled = imputer.fit_transform(df_scaled)
df_imputed_scaled = pd.DataFrame(df_imputed_scaled, columns=colunas_para_imputar_knn, index=df.index)

df_imputed_original_scale = scaler.inverse_transform(df_imputed_scaled)
df_imputed_original_scale = pd.DataFrame(df_imputed_original_scale, columns=colunas_para_imputar_knn, index=df.index)

df['SkinThickness'] = df_imputed_original_scale['SkinThickness']
df['Insulin'] = df_imputed_original_scale['Insulin']

In [35]:
from sklearn.neighbors import KNeighborsClassifier

data = df

feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
                'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X = data[feature_cols]
y = data.Outcome

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)

In [36]:
#Realizando previsoes com o arquivo de teste
data_app = pd.read_csv('/content/diabetes_app.csv')
data_app = data_app[feature_cols]
y_pred = neigh.predict(data_app)
print(y_pred)

[0 1 0 1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 1 1 0 0 1
 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 1 0 1 0
 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
 0 0 1 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 1 1 1 0 1 0 1 0 0
 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 1 0 0 0
 0 1 0 0 0 1 0 0 1 0 0]


In [37]:
import requests

URL = "https://aydanomachado.com/mlclass/01_Preprocessing.php"
DEV_KEY = "Vasco da Gama"

data = {'dev_key':DEV_KEY,
        'predictions':pd.Series(y_pred).to_json(orient='values')}

r = requests.post(url = URL, data = data)

pastebin_url = r.text
print(" - Resposta do servidor:\n", r.text, "\n")

 - Resposta do servidor:
 {"status":"success","dev_key":"Vasco da Gama","accuracy":0.5714285714285714,"old_accuracy":0} 

