In [2]:
!git clone https://github.com/Sugar-Care/ML_Stuff.git

fatal: destination path 'ML_Stuff' already exists and is not an empty directory.


In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('/content/ML_Stuff/Dataset/dataset_diabetes_2.csv')

In [3]:
df.dropna(inplace=True)

In [22]:
df.to_csv('dataset_cleaned.csv', index=False)

In [4]:
df = df.sort_values(by='Target').reset_index(drop=True)
df['Target_encoded'] = LabelEncoder().fit_transform(df['Target'])

In [5]:
print(df[['Target', 'Target_encoded']].sample(10))

                                           Target  Target_encoded
14499                             Type 2 Diabetes               9
12069                    Steroid-Induced Diabetes               7
11379                    Steroid-Induced Diabetes               7
13380                             Type 1 Diabetes               8
16515  Type 3c Diabetes (Pancreatogenic Diabetes)              10
3730                                         LADA               2
19763                            Wolfram Syndrome              12
17960                   Wolcott-Rallison Syndrome              11
9993                           Secondary Diabetes               6
10847                          Secondary Diabetes               6


In [6]:
X = df[['Age', 'Blood Glucose Levels', 'Blood Pressure', 'Weight Gain During Pregnancy',
        'Waist Circumference', 'BMI', 'Insulin Levels', 'Cholesterol Levels',
        'Digestive Enzyme Levels', 'Pulmonary Function']]
y = df['Target_encoded']

In [7]:
# Split data into training and temporary sets (80% train, 20% temp)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42) #random_state for reproducibility

# Split the temporary set into validation and testing sets (50% validation, 50% test)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Now you have:
print(len(X_train), len(y_train))
print(len(X_val), len(y_val))
print(len(X_test), len(y_test))

16354 16354
2044 2044
2045 2045


In [8]:
sulthan =  tf.keras.Sequential([
    tf.keras.layers.Reshape((X_train.shape[1], 1), input_shape=(X_train.shape[1],)),
    tf.keras.layers.Conv1D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Conv1D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(len(df['Target'].unique()), activation='softmax')
])
sulthan.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'mae', 'mse'])

  super().__init__(**kwargs)


In [9]:
sulthan.summary()

In [10]:
# Training the model
history = sulthan.fit(
    X_train,
    y_train,
    epochs=200,
    batch_size=128,
    validation_data=(X_val, y_val),
    verbose=1
)

Epoch 1/200
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.1978 - loss: 4.7278 - mae: 5.9431 - mse: 49.3998 - val_accuracy: 0.5470 - val_loss: 1.2581 - val_mae: 6.0269 - val_mse: 50.2553
Epoch 2/200
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5201 - loss: 1.2784 - mae: 5.9135 - mse: 49.0468 - val_accuracy: 0.6414 - val_loss: 0.9431 - val_mae: 6.0269 - val_mse: 50.2663
Epoch 3/200
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5972 - loss: 1.0115 - mae: 5.9199 - mse: 49.0812 - val_accuracy: 0.6595 - val_loss: 0.8522 - val_mae: 6.0269 - val_mse: 50.2714
Epoch 4/200
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6382 - loss: 0.9156 - mae: 5.8873 - mse: 48.7509 - val_accuracy: 0.6546 - val_loss: 0.8787 - val_mae: 6.0269 - val_mse: 50.2738
Epoch 5/200
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/st

In [19]:
unique_targets = df['Target'].unique().tolist()
unique_targets.sort()

print("Unique Targets:")
for target in unique_targets:
    print(target)

Unique Targets:
Cystic Fibrosis-Related Diabetes (CFRD)
Gestational Diabetes
LADA
MODY
Neonatal Diabetes Mellitus (NDM)
Prediabetic
Secondary Diabetes
Steroid-Induced Diabetes
Type 1 Diabetes
Type 2 Diabetes
Type 3c Diabetes (Pancreatogenic Diabetes)
Wolcott-Rallison Syndrome
Wolfram Syndrome


In [21]:
loss, accuracy, mae, mse = sulthan.evaluate(X_test, y_test, verbose=0)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test MAE: {mae:.4f}")
print(f"Test MSE: {mse:.4f}")

y_pred = sulthan.predict(X_test)
y_pred_classes = tf.argmax(y_pred, axis=1)
y_pred_probs = np.max(y_pred, axis=1)

print("\nExample Predictions:")
for i in range(20):
    predicted_class_name = unique_targets[y_pred_classes[i]]
    actual_class_name = unique_targets[y_test.iloc[i]]
    print(f"Sample {i+1}: Predicted Class - {predicted_class_name} ({y_pred_classes[i]}), Actual Class - {actual_class_name} ({y_test.iloc[i]}), Probability - {y_pred_probs[i]:.4f}")

Test Loss: 0.4120
Test Accuracy: 0.8318
Test MAE: 5.9542
Test MSE: 49.0030
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

Example Predictions:
Sample 1: Predicted Class - Wolfram Syndrome (12), Actual Class - Wolfram Syndrome (12), Probability - 0.8343
Sample 2: Predicted Class - Prediabetic (5), Actual Class - Prediabetic (5), Probability - 0.9650
Sample 3: Predicted Class - Wolcott-Rallison Syndrome (11), Actual Class - Wolcott-Rallison Syndrome (11), Probability - 0.9999
Sample 4: Predicted Class - Type 3c Diabetes (Pancreatogenic Diabetes) (10), Actual Class - Type 3c Diabetes (Pancreatogenic Diabetes) (10), Probability - 0.8269
Sample 5: Predicted Class - Type 2 Diabetes (9), Actual Class - Type 2 Diabetes (9), Probability - 0.9986
Sample 6: Predicted Class - Type 1 Diabetes (8), Actual Class - Type 1 Diabetes (8), Probability - 1.0000
Sample 7: Predicted Class - Secondary Diabetes (6), Actual Class - Secondary Diabetes (6), Probability - 0.9998
Sample 8

In [23]:
sulthan.save("sulthan_cnn_models.keras")

In [24]:
with open('unique_targets.txt', 'w') as f:
    for target in unique_targets:
        f.write(f"{target}\n")