In [1]:
!pip install imbalanced-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd

from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks


In [3]:
#importing dataset from google drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
diabetis =  pd.read_csv('/content/drive/My Drive/Colab Notebooks/diabetis.csv',header = None)

In [5]:
diabetis.head

<bound method NDFrame.head of       0    1   2   3    4     5      6   7  8
0     6  148  72  35    0  33.6  0.627  50  1
1     1   85  66  29    0  26.6  0.351  31  0
2     8  183  64   0    0  23.3  0.672  32  1
3     1   89  66  23   94  28.1  0.167  21  0
4     0  137  40  35  168  43.1  2.288  33  1
..   ..  ...  ..  ..  ...   ...    ...  .. ..
763  10  101  76  48  180  32.9  0.171  63  0
764   2  122  70  27    0  36.8  0.340  27  0
765   5  121  72  23  112  26.2  0.245  30  0
766   1  126  60   0    0  30.1  0.349  47  1
767   1   93  70  31    0  30.4  0.315  23  0

[768 rows x 9 columns]>

In [6]:
X = diabetis.iloc[:,:-1]
y = diabetis.iloc[:,-1]


In [7]:
#scaling the training data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


In [8]:
X

Unnamed: 0,0,1,2,3,4,5,6,7
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [9]:

y.value_counts()


0    500
1    268
Name: 8, dtype: int64

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled,y,random_state = 2,test_size = 0.25,stratify = y)

In [11]:
X_train

array([[0.        , 0.63316583, 0.70491803, ..., 0.40834575, 0.18659266,
        0.        ],
       [0.17647059, 0.69849246, 0.44262295, ..., 0.38152012, 0.1383433 ,
        0.01666667],
       [0.41176471, 0.9798995 , 0.57377049, ..., 0.37406855, 0.03629377,
        0.56666667],
       ...,
       [0.64705882, 0.42713568, 0.60655738, ..., 0.4485842 , 0.09479078,
        0.23333333],
       [0.23529412, 0.56281407, 0.63934426, ..., 0.58718331, 0.06746371,
        0.28333333],
       [0.        , 0.4321608 , 0.55737705, ..., 0.53353204, 0.06831768,
        0.06666667]])

In [12]:
#to get rid of the unbalanced class
oversample = SMOTE()
X_res, y_res = oversample.fit_resample(X_train, y_train)
#tl = TomekLinks(sampling_strategy=0.5)
#X_res, y_res = tl.fit_resample(X,y)

In [13]:
y_res.value_counts()

0    375
1    375
Name: 8, dtype: int64

In [14]:
y_train.shape

(576,)

In [15]:
y_res = to_categorical(y_res)
y_test = to_categorical(y_test)
num_classes = 2

In [16]:
X_train.shape

(576, 8)

In [17]:
def build_model():
  model = Sequential()
  model.add(Dense(6,input_dim=8,activation='relu'))
  model.add(Dense(8,activation='relu'))
  model.add(Dense(12,activation='relu'))
  model.add(Dense(10,activation='relu'))
  model.add(Dense(num_classes,activation='sigmoid'))
  model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
  return model

In [18]:
model = build_model()

In [19]:
y_train.shape

(576,)

In [20]:
y_train

736    0
242    1
498    1
572    0
664    1
      ..
212    0
533    0
559    0
725    0
253    0
Name: 8, Length: 576, dtype: int64

In [22]:
model.summary()
sample_weight = np.ones(shape=(len(y_res),2))
#sample_weight.shape
sample_weight[y_res==[0.,1.] ] = 0.5

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 6)                 54        
                                                                 
 dense_1 (Dense)             (None, 8)                 56        
                                                                 
 dense_2 (Dense)             (None, 12)                108       
                                                                 
 dense_3 (Dense)             (None, 10)                130       
                                                                 
 dense_4 (Dense)             (None, 2)                 22        
                                                                 
Total params: 370
Trainable params: 370
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(X_res,y_res,epochs=400,verbose=2)

In [24]:
y_pred = model.predict(X_test)

In [25]:
y_pred

array([[0.6757058 , 0.3996528 ],
       [0.49408197, 0.6140089 ],
       [0.42619896, 0.6955565 ],
       [0.35129017, 0.77893776],
       [0.32928377, 0.8035573 ],
       [0.5449336 , 0.5651586 ],
       [0.3096491 , 0.8275326 ],
       [0.322672  , 0.8105812 ],
       [0.36752662, 0.76189375],
       [0.84105825, 0.17683122],
       [0.48256055, 0.6304091 ],
       [0.7177992 , 0.33994237],
       [0.8987068 , 0.10729671],
       [0.548937  , 0.5675062 ],
       [0.34065127, 0.7895622 ],
       [0.5459013 , 0.5759804 ],
       [0.6178694 , 0.47144666],
       [0.65511036, 0.43366146],
       [0.82288885, 0.19520691],
       [0.82651484, 0.19680601],
       [0.7054847 , 0.3468523 ],
       [0.95529324, 0.03777248],
       [0.41058624, 0.7109484 ],
       [0.82803714, 0.18756938],
       [0.36097258, 0.7689656 ],
       [0.83492756, 0.18833706],
       [0.6649111 , 0.39267457],
       [0.5254778 , 0.58232397],
       [0.7771859 , 0.2480602 ],
       [0.31831276, 0.81542   ],
       [0.

In [26]:
y_test

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.

In [27]:
y_test = np.argmax(y_test,axis = 1)
y_predict = np.argmax(y_pred,axis=1)

In [28]:
y_test.shape

(192,)

In [29]:
y_predict

array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1])

In [30]:
confumat = metrics.confusion_matrix(y_test,y_predict)

In [31]:
confumat

array([[94, 31],
       [20, 47]])

In [32]:
result = metrics.classification_report(y_test,y_predict)
print()
print('Classification Results')
print(result)


Classification Results
              precision    recall  f1-score   support

           0       0.82      0.75      0.79       125
           1       0.60      0.70      0.65        67

    accuracy                           0.73       192
   macro avg       0.71      0.73      0.72       192
weighted avg       0.75      0.73      0.74       192

