In [19]:
import tensorflow as tf
import pandas as pd 
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

In [20]:
data = pd.read_csv("diabetes.csv")
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [21]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [22]:
X = data.iloc[:,:8]
y = data.iloc[:,8]

In [23]:
np.bincount(y)

array([500, 268])

In [24]:
X.shape, y.shape

((768, 8), (768,))

In [25]:
X_train,X_test,y_train,y_test = train_test_split(
    X,y,
    test_size=0.2,
    random_state = 42,
    stratify= y
)

In [26]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [27]:
np.random.seed(42)
tf.random.set_seed(42)

In [28]:
model = keras.Sequential([
     keras.layers.Input(shape=(X_train.shape[1],)),
     keras.layers.Dense(5, activation='relu', kernel_regularizer = tf.keras.regularizers.L2(0.1)),
     keras.layers.BatchNormalization(),
     keras.layers.Dropout(0.3),
     keras.layers.Dense(3, activation='relu', kernel_regularizer = tf.keras.regularizers.L2(0.1)),
     keras.layers.BatchNormalization(),
     keras.layers.Dropout(0.3),
     keras.layers.Dense(1, activation='sigmoid')
 ])

In [31]:
model.compile(
    optimizer = 'adam',
    loss = "binary_crossentropy",
    metrics = ['accuracy']
)

In [32]:
model.summary()

In [34]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = "val_loss",
    patience = 10,
    restore_best_weights = True
)

In [35]:
history = model.fit(
    X_train,y_train,
    epochs = 100,
    batch_size = 32,
    validation_split = 0.2,
    verbose = 1,
    callbacks =[early_stopping]
)

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.4460 - loss: 1.8033 - val_accuracy: 0.6098 - val_loss: 1.6205
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4623 - loss: 1.6847 - val_accuracy: 0.6260 - val_loss: 1.5713
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4827 - loss: 1.6706 - val_accuracy: 0.5691 - val_loss: 1.5228
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5153 - loss: 1.5845 - val_accuracy: 0.5935 - val_loss: 1.4763
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5153 - loss: 1.5117 - val_accuracy: 0.5935 - val_loss: 1.4331
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5234 - loss: 1.4792 - val_accuracy: 0.5935 - val_loss: 1.3903
Epoch 7/100
[1m16/16[0m [32m━━

In [38]:
loss,acc = model.evaluate(
    X_test,y_test
)
loss,acc

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7468 - loss: 0.5011 


(0.5011002421379089, 0.7467532753944397)

In [42]:
y_pred_prob = model.predict(X_test).ravel()
y_pred_prob

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


array([0.5802986 , 0.23643686, 0.27575326, 0.38821313, 0.04704934,
       0.2560077 , 0.3814308 , 0.80221915, 0.09947903, 0.72645044,
       0.1717366 , 0.5489545 , 0.09772994, 0.12687856, 0.2577407 ,
       0.3930824 , 0.5661806 , 0.09037449, 0.7271756 , 0.2465395 ,
       0.29507712, 0.71963257, 0.2188121 , 0.78698725, 0.4622981 ,
       0.11933888, 0.6355375 , 0.08211433, 0.34713513, 0.06822731,
       0.07674888, 0.07382142, 0.5196268 , 0.49538115, 0.75980693,
       0.14771408, 0.26832855, 0.08937384, 0.68063253, 0.4853148 ,
       0.31608975, 0.28809693, 0.18031448, 0.32101342, 0.16346337,
       0.37148425, 0.15960537, 0.13429458, 0.5699864 , 0.4496253 ,
       0.46760258, 0.7263398 , 0.29861355, 0.091213  , 0.46040812,
       0.40172145, 0.64492005, 0.2547635 , 0.6447117 , 0.11179188,
       0.69529176, 0.18805984, 0.05257344, 0.7714034 , 0.07366166,
       0.33800718, 0.7538299 , 0.07635782, 0.32197282, 0.647864  ,
       0.1481749 , 0.09953902, 0.35355428, 0.51096094, 0.07918

In [43]:
y_pred = (y_pred_prob >= 0.5).astype(int)
y_pred

array([1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])

In [44]:
mean_absolute_error(y_test,y_pred_prob)

0.33541765809059143

In [46]:
from sklearn.metrics import root_mean_squared_error, mean_squared_error

In [48]:
mean_squared_error(y_test,y_pred_prob)

0.1613183617591858

In [49]:
root_mean_squared_error(y_test,y_pred_prob)

0.4016445577144623

# Part B

In [3]:
import tensorflow as tf 
import pandas as pd 
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

In [4]:
data = pd.read_csv("Boston.csv")
data.head()

Unnamed: 0,crim,zn,indus,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   crim     506 non-null    float64
 1   zn       506 non-null    float64
 2   indus    506 non-null    float64
 3   nox      506 non-null    float64
 4   rm       506 non-null    float64
 5   age      506 non-null    float64
 6   dis      506 non-null    float64
 7   rad      506 non-null    int64  
 8   tax      506 non-null    int64  
 9   ptratio  506 non-null    float64
 10  black    506 non-null    float64
 11  lstat    506 non-null    float64
 12  medv     506 non-null    float64
dtypes: float64(11), int64(2)
memory usage: 51.5 KB


In [6]:
X = data.iloc[:,:12]
y = data.iloc[:,12]

In [7]:
X.shape, y.shape

((506, 12), (506,))

In [8]:
np.bincount(y)

  np.bincount(y)


array([ 0,  0,  0,  0,  0,  3,  1,  7, 10,  3, 10,  9,  9, 24, 18, 16, 16,
       22, 25, 37, 36, 31, 35, 37, 25, 11,  8,  9, 10, 10,  7,  8,  7,  9,
        5,  4,  6,  5,  1,  1,  0,  2,  2,  3,  2,  1,  2,  0,  3,  0, 16])

In [9]:
X_train,X_test,y_train,y_test = train_test_split(
    X,y,
    random_state = 42,
    test_size = 0.2
)

In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
from tensorflow.keras import regularizers

model = keras.Sequential(
    [
        keras.layers.Input(shape=(X_train.shape[1],)),
        keras.layers.Dense(
            3, activation=None, kernel_regularizer=regularizers.L2(0.01)
        ),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(
            2, activation=None, kernel_regularizer=regularizers.L2(0.01)
        ),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation=None),
    ]
)

In [None]:
from keras.metrics import RootMeanSquaredError

In [16]:
model.compile(
    optimizer = keras.optimizers.Adam(learning_rate=0.01),
    loss = "mse",
    metrics = [keras.metrics.RootMeanSquaredError()]
    )

In [17]:
model.summary()

In [19]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = "val_loss",
    patience = 10,
    restore_best_weights = True 
)

In [20]:
history = model.fit(
    X_train,y_train,
    epochs = 100,
    batch_size = 32,
    validation_split = 0.2,
    verbose = 1,
    callbacks = [early_stopping]
)

Epoch 1/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 615.4842 - root_mean_squared_error: 24.8079 - val_loss: 552.1129 - val_root_mean_squared_error: 23.4960
Epoch 2/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 604.9148 - root_mean_squared_error: 24.5939 - val_loss: 542.9128 - val_root_mean_squared_error: 23.2994
Epoch 3/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 593.5216 - root_mean_squared_error: 24.3612 - val_loss: 530.5380 - val_root_mean_squared_error: 23.0323
Epoch 4/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 579.3836 - root_mean_squared_error: 24.0693 - val_loss: 514.5491 - val_root_mean_squared_error: 22.6825
Epoch 5/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 555.5889 - root_mean_squared_error: 23.5698 - val_loss: 495.1848 - val_root_mean_squared_error: 22.2516
Epoch 6/100
[

In [21]:
model.summary()

In [22]:
model.evaluate(
    X_test,y_test
)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 36.6647 - root_mean_squared_error: 6.0496


[36.66472244262695, 6.049586772918701]

In [28]:
y_pred = model.predict(X_test).ravel()

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


In [29]:
y_pred

array([25.08019  , 27.225359 , 14.070219 , 20.3683   , 16.68026  ,
       19.841469 , 15.672447 , 12.660824 , 19.283836 , 18.251467 ,
       18.994753 , 17.106518 , -5.3200564, 18.448116 , 16.890146 ,
       22.043722 , 17.224363 ,  4.947438 , 33.4078   , 15.676867 ,
       22.182703 , 24.41693  , 10.533539 , 21.030424 , 16.022068 ,
       13.329239 , 18.954851 , 13.801767 , 16.981213 , 17.002241 ,
       19.406137 , 21.629446 , 22.085634 , 16.515375 , 14.103056 ,
       17.222216 , 27.04925  , 17.380804 , 17.725769 , 19.94692  ,
       12.324301 , 26.742405 , 34.652958 , 14.638563 , 23.485064 ,
       15.016142 , 12.881725 , 21.044216 , 17.067156 , 26.532776 ,
       19.304213 , 28.683294 , 14.45186  , 22.792961 , 33.806225 ,
       18.953428 , 16.983192 , 26.286955 , 21.24018  , 11.490976 ,
       19.487413 , 25.12282  , 26.140327 , 13.751307 , 18.499262 ,
       12.074436 , 17.879803 , 21.542936 , 24.83816  , 11.272459 ,
       16.898174 , 22.994955 , 10.196117 , 13.807495 , 19.8229

In [30]:
from sklearn.metrics import mean_squared_error, root_mean_squared_error, mean_absolute_error

In [31]:
mean_squared_error(y_test,y_pred)

36.5975002635851

In [32]:
root_mean_squared_error(y_test,y_pred)

6.049586784532072

In [33]:
mean_absolute_error(y_test,y_pred)

4.0929549815607995