# Diabetes Prediction

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import MinMaxScaler

In [3]:
data = pd.read_csv('KTH_research_data.csv')
data.head()

Unnamed: 0,HbA1c_level,blood_glucose_level_fasting,age,bmi,diabetes
0,6.6,140,80.0,25.19,0
1,6.6,80,54.0,27.32,0
2,5.7,158,28.0,27.32,0
3,5.0,155,36.0,23.45,0
4,4.8,155,76.0,20.14,0


In [4]:
data.shape

(100000, 5)

In [5]:
data.isna().any()

HbA1c_level                    False
blood_glucose_level_fasting    False
age                            False
bmi                            False
diabetes                       False
dtype: bool

In [6]:
data.dtypes

HbA1c_level                    float64
blood_glucose_level_fasting      int64
age                            float64
bmi                            float64
diabetes                         int64
dtype: object

In [7]:
X = data.drop(columns=['diabetes'], axis=1).values
y = data['diabetes'].values
y

array([0, 0, 0, ..., 0, 0, 0])

In [8]:
# scaler = MinMaxScaler()
# X_Scaled = scaler.fit_transform(X[X.columns])

In [9]:
X

array([[  6.6 , 140.  ,  80.  ,  25.19],
       [  6.6 ,  80.  ,  54.  ,  27.32],
       [  5.7 , 158.  ,  28.  ,  27.32],
       ...,
       [  5.7 , 155.  ,  66.  ,  27.83],
       [  4.  , 100.  ,  24.  ,  35.42],
       [  6.6 ,  90.  ,  57.  ,  22.43]])

In [10]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
model = RandomForestClassifier(n_estimators=200)

In [12]:
model.fit(X_train, y_train)

In [13]:
prediction = model.predict(X_valid)
print("Accuracy Score: ", accuracy_score(prediction, y_valid))

Accuracy Score:  0.96745


In [14]:
confusion_matrix(prediction, y_valid)

array([[18151,   510],
       [  141,  1198]])

In [15]:
prediction[0:100]

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0])

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Normalization, Dropout

2023-05-23 14:31:34.210228: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [17]:
model_tensorflow = Sequential([
    Dense(32, activation='relu'),
    Dense(64, activation='relu'),
#     Dropout(0.05),
    
    Dense(128, activation='relu'),
    Dense(128, activation='relu'),
#     Normalization(),
    
    Dense(64, activation='relu'),
    Dense(16, activation='relu'),
    
    Dense(1, activation='sigmoid')
])

In [18]:
model_tensorflow.compile(
    optimizer='adam',
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy']
)

In [19]:
X_train

array([[  3.5 ,  80.  ,  73.  ,  24.77],
       [  5.7 , 145.  ,  80.  ,  24.6 ],
       [  4.  , 158.  ,  38.  ,  24.33],
       ...,
       [  5.8 ,  85.  ,  42.  ,  26.14],
       [  6.2 , 158.  ,  37.  ,  24.96],
       [  5.  , 159.  ,  23.  ,  27.99]])

In [20]:
model_tensorflow.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x15edbdba0>

In [21]:
model_tensorflow.evaluate(X_valid, y_valid)



[0.11058790236711502, 0.9604499936103821]

In [33]:
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model_tensorflow)
tflite_model = converter.convert()



INFO:tensorflow:Assets written to: /var/folders/_m/45rzr5c52sl0xy3rr96lz9rr0000gp/T/tmpkjswhh6v/assets


INFO:tensorflow:Assets written to: /var/folders/_m/45rzr5c52sl0xy3rr96lz9rr0000gp/T/tmpkjswhh6v/assets
2023-05-23 14:41:07.598503: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2023-05-23 14:41:07.598541: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-05-23 14:41:07.603910: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /var/folders/_m/45rzr5c52sl0xy3rr96lz9rr0000gp/T/tmpkjswhh6v
2023-05-23 14:41:07.607155: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-05-23 14:41:07.607183: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /var/folders/_m/45rzr5c52sl0xy3rr96lz9rr0000gp/T/tmpkjswhh6v
2023-05-23 14:41:07.623688: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2023-05-23 14:41:07.740756: I tensorflow/cc/saved_model/loader.cc:215] Running initialization

In [34]:
# Save the model.
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)

In [140]:
model_tensorflow()

array([[  6.6 , 140.  ,  80.  ,  25.19],
       [  6.6 ,  80.  ,  54.  ,  27.32],
       [  5.7 , 158.  ,  28.  ,  27.32],
       ...,
       [  5.7 , 155.  ,  66.  ,  27.83],
       [  4.  , 100.  ,  24.  ,  35.42],
       [  6.6 ,  90.  ,  57.  ,  22.43]])

In [31]:
data[data['diabetes'] == 1].head(10)

Unnamed: 0,HbA1c_level,blood_glucose_level_fasting,age,bmi,diabetes
6,6.5,200,44.0,19.31,1
26,6.5,200,67.0,27.32,1
38,5.7,260,50.0,27.32,1
40,9.0,160,73.0,25.91,1
53,7.0,159,53.0,27.32,1
55,9.0,159,50.0,37.16,1
59,8.8,155,67.0,63.48,1
81,8.2,126,57.0,27.32,1
87,6.2,220,36.0,32.27,1
94,7.5,300,60.0,27.32,1


In [32]:
model_tensorflow.predict([[8.2, 126, 57, 27.32]])



array([[0.999607]], dtype=float32)

In [30]:
model_tensorflow.predict([[7.0, 159, 53, 27.31]])



array([[0.10650457]], dtype=float32)

In [28]:
model_tensorflow.predict([[9.0, 160, 73, 25.91]])



array([[0.999922]], dtype=float32)

In [26]:
model_tensorflow.predict([[5.0, 155, 36.0, 23.45]])



array([[0.00403656]], dtype=float32)

In [148]:
model_tensorflow.input_shape

(None, 4)

In [147]:
a = np.array([[6.5, 200, 44.0, 13.31]])
a.shape

(1, 4)

In [178]:
model_tensorflow.predict(a)



array([[0.44628316]], dtype=float32)

In [179]:
preds = model_tensorflow.predict(X_valid)



In [188]:
model.predict(a)

array([0])

In [194]:
!pip3 install tensorflow_decision_forests

Collecting tensorflow_decision_forests
  Downloading tensorflow_decision_forests-1.3.0-cp310-cp310-macosx_10_15_x86_64.whl (12.0 MB)
[K     |████████████████████████████████| 12.0 MB 638 kB/s eta 0:00:01    |███████████████████████         | 8.6 MB 2.3 MB/s eta 0:00:02
[?25hCollecting wheel
  Downloading wheel-0.40.0-py3-none-any.whl (64 kB)
[K     |████████████████████████████████| 64 kB 2.0 MB/s eta 0:00:01
[?25hCollecting wurlitzer
  Downloading wurlitzer-3.0.3-py3-none-any.whl (7.3 kB)
Collecting absl-py
  Downloading absl_py-1.4.0-py3-none-any.whl (126 kB)
[K     |████████████████████████████████| 126 kB 7.9 MB/s eta 0:00:01
[?25hCollecting tensorflow~=2.12.0
  Downloading tensorflow-2.12.0-cp310-cp310-macosx_10_15_x86_64.whl (230.1 MB)
[K     |████████████████████████████████| 230.1 MB 56 kB/s  eta 0:00:01   |                                | 491 kB 4.8 MB/s eta 0:00:48     |                                | 706 kB 4.8 MB/s eta 0:00:48     |█                               

Collecting pytz>=2020.1
  Downloading pytz-2023.3-py2.py3-none-any.whl (502 kB)
[K     |████████████████████████████████| 502 kB 1.7 MB/s eta 0:00:01
[?25hCollecting tzdata>=2022.1
  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)
[K     |████████████████████████████████| 341 kB 2.8 MB/s eta 0:00:01
[?25hUsing legacy 'setup.py install' for grpcio, since package 'wheel' is not installed.
Building wheels for collected packages: jax
  Building wheel for jax (PEP 517) ... [?25ldone
[?25h  Created wheel for jax: filename=jax-0.4.10-py3-none-any.whl size=1480503 sha256=6dffe5be7a03b56a4e229abb3edf98480e3f0c22a4b6b7466e6d0068eaec3076
  Stored in directory: /Users/sudaisalam/Library/Caches/pip/wheels/2f/04/51/ebc9c5225f0a0df1e56c231c1f4c9b7afd3e024ebb492eed99
Successfully built jax
Installing collected packages: urllib3, pyasn1, idna, charset-normalizer, certifi, six, rsa, requests, pyasn1-modules, oauthlib, cachetools, requests-oauthlib, numpy, MarkupSafe, google-auth, wheel, we

In [195]:
!python3 -c "import tensorflow_decision_forests as tfdf; print('Found TF-DF v' + tfdf.__version__)"

  Referenced from: /Users/sudaisalam/Sudais/WORK/fiverr/kth_diabetes model/env/lib/python3.10/site-packages/tensorflow_decision_forests/tensorflow/ops/inference/inference.so (which was built for Mac OS X 12.3)
  Expected in: /usr/lib/libc++.1.dylib

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/sudaisalam/Sudais/WORK/fiverr/kth_diabetes model/env/lib/python3.10/site-packages/tensorflow_decision_forests/__init__.py", line 64, in <module>
    from tensorflow_decision_forests import keras
  File "/Users/sudaisalam/Sudais/WORK/fiverr/kth_diabetes model/env/lib/python3.10/site-packages/tensorflow_decision_forests/keras/__init__.py", line 53, in <module>
    from tensorflow_decision_forests.keras import core
  File "/Users/sudaisalam/Sudais/WORK/fiverr/kth_diabetes model/env/lib/python3.10/site-packages/tensorflow_decision_forests/keras/core.py", line 61, in <module>
    from tensorflow_decision_forests.keras import core_inference
  File 

In [196]:
pip install wurlitzer


Collecting wurlitzer
  Using cached wurlitzer-3.0.3-py3-none-any.whl (7.3 kB)
Installing collected packages: wurlitzer
Successfully installed wurlitzer-3.0.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [197]:
import tensorflow_decision_forests as tfdf


ModuleNotFoundError: No module named 'tensorflow_decision_forests'