In [1]:
import numpy as np # arrays & loading data
import tensorflow as tf # arrays & loading data
from tensorflow.keras.models import Sequential  # model type that we will use
from tensorflow.keras.layers import Dense # we will use Dense layers

# suppress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

In [2]:
# loading all the data
data = np.loadtxt('./diabetes_data.csv', delimiter=',', skiprows=1)
print(data)

[[40.  1.  0. ...  1.  1.  1.]
 [58.  1.  0. ...  1.  0.  1.]
 [41.  1.  1. ...  1.  0.  1.]
 ...
 [58.  0.  1. ...  0.  1.  1.]
 [32.  0.  0. ...  1.  0.  0.]
 [42.  1.  0. ...  0.  0.  0.]]


In [3]:
X = data[:,:-1] # forming the input and output 
y = data[:,-1]

y = np.expand_dims(y, axis=1) # make y 2D - the commands later will require it
print(X[:5])
print(y[:5])

[[40.  1.  0.  1.  0.  1.  0.  0.  0.  1.  0.  1.  0.  1.  1.  1.]
 [58.  1.  0.  0.  0.  1.  0.  0.  1.  0.  0.  0.  1.  0.  1.  0.]
 [41.  1.  1.  0.  0.  1.  1.  0.  0.  1.  0.  1.  0.  1.  1.  0.]
 [45.  1.  0.  0.  1.  1.  1.  1.  0.  1.  0.  1.  0.  0.  0.  0.]
 [60.  1.  1.  1.  1.  1.  1.  0.  1.  1.  1.  1.  1.  1.  1.  1.]]
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [4]:
# split into training , cross validation and test sets
from sklearn.model_selection import train_test_split 

# TRAINING SET - 60%
X_train, X_temporary, y_train, y_temporary = train_test_split(X, y, test_size=0.40, random_state=1)

# the rest of 40% - CV SET(20%) and TEST SET(20%)
X_cv, X_test, y_cv, y_test = train_test_split(X_temporary, y_temporary, test_size=0.50, random_state=1)
del X_temporary, y_temporary

print(f"training input shape:{X_train.shape}")
print(f"training output shape:{y_train.shape}")
print(f"cv input shape:{X_cv.shape}")
print(f"cv output shape:{y_cv.shape}")
print(f"test input shape:{X_test.shape}")
print(f"test output shape:{y_test.shape}")

training input shape:(312, 16)
training output shape:(312, 1)
cv input shape:(104, 16)
cv output shape:(104, 1)
test input shape:(104, 16)
test output shape:(104, 1)


In [5]:
# sequential model structure

# CHANGE TO CONVOLUTIONAL
model= Sequential(
    [
        tf.keras.Input(shape=(16,)), # input size (each patient has 16 features)
        Dense(10,activation="sigmoid", name="layer1"),
        Dense(5,activation="sigmoid", name="layer2"),
        Dense(1,activation="sigmoid", name="layer3"),
    ], name="binary_model"
)

In [6]:
# see details about the activation of every layer and the form of the w and b parameters
model.summary()

In [7]:

# define loss and optimizer of the Adam's algorithm
model.compile(
    # this is similar to gradient descent, but it is a much improved version
    loss=tf.keras.losses.BinaryCrossentropy(), # BC - binary class 
    optimizer=tf.keras.optimizers.Adam(0.02), # preimplemented optimizer
)

In [8]:
# train the model "epochs" times
model.fit(
    X_train, y_train,
    epochs = 54,
)

Epoch 1/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6809  
Epoch 2/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.6371 
Epoch 3/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - loss: 0.6014  
Epoch 4/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - loss: 0.5369  
Epoch 5/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4698 
Epoch 6/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - loss: 0.3682  
Epoch 7/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3133 
Epoch 8/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2806 
Epoch 9/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2205
Epoch 10/54
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2154 

<keras.src.callbacks.history.History at 0x204ab2bd6a0>

In [9]:
# fraction of misclassified outputs in the TRAINING SET
y_predicted = model.predict(X_train)
# classic way of calculating the error in a classification case
y_predicted = np.where(y_predicted>=0.5,1,0)
error = np.mean(y_predicted != y_train) # arithmetic mean, where the numerator is the count of wrong predictions
print(f"Training Set Classification Error: {error}")

# do the same for CV set
y_predicted = model.predict(X_cv)
y_predicted = np.where(y_predicted>=0.5,1,0)
error2 = np.mean(y_predicted != y_cv) 
print(f"CV Set Classification Error: {error2}")

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Training Set Classification Error: 0.060897435897435896
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step  
CV Set Classification Error: 0.07692307692307693


In [10]:
# do the same for TEST set - publish test error
y_predicted = model.predict(X_test)
y_predicted = np.where(y_predicted>=0.5,1,0)
error3 = np.mean(y_predicted != y_test) 
print(f"TEST Set Classification Error: {error3}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step  
TEST Set Classification Error: 0.07692307692307693


In [11]:
# user interaction
age = int(input("age :"))
male = int(input("male (1/0) :"))
polyuria = int(input("polyuria (1/0) :"))
polydipsia = int(input("polydipsia (1/0) :"))
sudden_weight_loss = int(input("sudden_weight_loss (1/0) :"))

weakness = int(input("weakness (1/0) :"))
polyphagia = int(input("polyphagia (1/0) :"))
genital_thrush = int(input("genital_thrush (1/0) :"))
visual_blurring = int(input("visual_blurring (1/0) :"))
itching = int(input("itching (1/0) :"))

irritability = int(input("irritability (1/0) :"))
delayed_healing = int(input("delayed_healing (1/0) :"))
partial_paresis = int(input("partial_paresis (1/0) :"))
muscle_stiffness = int(input("muscle_stiffness (1/0) :"))
alopecia = int(input("alopecia (1/0) :"))
obesity = int(input("obesity (1/0) :"))

x_user = np.array([[age,male, polyuria	,polydipsia	,sudden_weight_loss	,weakness	,polyphagia,	genital_thrush	,visual_blurring	,itching	,irritability	,delayed_healing	,partial_paresis	,muscle_stiffness	,alopecia,	obesity
]]) # we need a 2D array 
y_predicted = model.predict(x_user)
y_predicted = np.where(y_predicted>=0.5,1,0)

if y_predicted[0] >= 0.5:
    print("Diabetes is likely to appear.")
else:
     print("Diabetes is unlikely to appear.")

age : 80
male (1/0) : 1
polyuria (1/0) : 0
polydipsia (1/0) : 0
sudden_weight_loss (1/0) : 0
weakness (1/0) : 0
polyphagia (1/0) : 0
genital_thrush (1/0) : 0
visual_blurring (1/0) : 0
itching (1/0) : 0
irritability (1/0) : 0
delayed_healing (1/0) : 0
partial_paresis (1/0) : 1
muscle_stiffness (1/0) : 1
alopecia (1/0) : 1
obesity (1/0) : 0


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Diabetes is unlikely to appear.
