In [128]:
import tensorflow as tf 
import numpy as np
import pandas as pd
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [129]:
data = pd.read_csv("diabetes.csv")
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [130]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [131]:
X = data.iloc[:,:8]
y = data.iloc[:,8]

In [132]:
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [133]:
y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [134]:
X.shape, y.shape

((768, 8), (768,))

In [135]:
X_train,X_test,y_train,y_test = train_test_split(
    X,y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [136]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [137]:
np.random.seed(42)
tf.random.set_seed(42)

In [138]:

model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),
    keras.layers.Dense(5, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(3, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(1, activation='sigmoid'),
])

In [139]:
model.compile(
    optimizer='adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

In [140]:
model.summary()

In [141]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = "val_loss",
    patience = 10,
    restore_best_weights = True,
)

In [142]:
history = model.fit(
    X_train,y_train,
    epochs = 100,
    batch_size = 32,
    validation_split = 0.2,
    verbose = 1,
    callbacks= [early_stopping]
)

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.4257 - loss: 0.8034 - val_accuracy: 0.3740 - val_loss: 0.7322
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4053 - loss: 0.7856 - val_accuracy: 0.4065 - val_loss: 0.7332
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4196 - loss: 0.7712 - val_accuracy: 0.4065 - val_loss: 0.7330
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4420 - loss: 0.7540 - val_accuracy: 0.4390 - val_loss: 0.7307
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4521 - loss: 0.7565 - val_accuracy: 0.4797 - val_loss: 0.7276
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5051 - loss: 0.7241 - val_accuracy: 0.4878 - val_loss: 0.7242
Epoch 7/100
[1m16/16[0m [32m━━

In [143]:
# validate
loss,acc = model.evaluate(X_test,y_test)
loss,acc

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7532 - loss: 0.5300 


(0.5299904346466064, 0.7532467246055603)

In [144]:
# predict
y_pred_prob = model.predict(X_test).ravel()
y_pred_prob

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


array([0.49080142, 0.44705927, 0.34666714, 0.28145358, 0.06637602,
       0.34807813, 0.5774913 , 0.59337515, 0.1047846 , 0.56565   ,
       0.16666901, 0.67394364, 0.138598  , 0.25035864, 0.19484283,
       0.39707887, 0.45359313, 0.11457954, 0.6339428 , 0.3024257 ,
       0.25936297, 0.5630802 , 0.2513228 , 0.6479202 , 0.47879136,
       0.14591989, 0.50237405, 0.08708533, 0.34456417, 0.07888432,
       0.09302855, 0.08304423, 0.3262818 , 0.38620806, 0.54587513,
       0.14993328, 0.3182667 , 0.09247228, 0.5350541 , 0.55438876,
       0.42740577, 0.47020873, 0.32668012, 0.28996912, 0.23337737,
       0.4299988 , 0.15600197, 0.13528213, 0.4389833 , 0.5070505 ,
       0.5414902 , 0.7540249 , 0.4142413 , 0.10710195, 0.541046  ,
       0.44848353, 0.5858116 , 0.3156486 , 0.51811373, 0.19442162,
       0.67176807, 0.24075536, 0.07914492, 0.5717621 , 0.08849755,
       0.39307767, 0.828844  , 0.13713938, 0.46266487, 0.4617333 ,
       0.17552367, 0.12579465, 0.31066513, 0.3371022 , 0.13335

In [145]:
y_pred = (y_pred_prob >= 0.5).astype(int)

In [146]:
y_pred

array([0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0])

In [147]:
mean_squared_error(y_test,y_pred_prob)

0.17741446197032928

In [148]:
np.sqrt(mean_squared_error(y_test,y_pred_prob))

np.float64(0.42120596146105205)

# Part B

In [149]:
data = pd.read_csv("Boston.csv")
data.head()

Unnamed: 0,crim,zn,indus,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [150]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   crim     506 non-null    float64
 1   zn       506 non-null    float64
 2   indus    506 non-null    float64
 3   nox      506 non-null    float64
 4   rm       506 non-null    float64
 5   age      506 non-null    float64
 6   dis      506 non-null    float64
 7   rad      506 non-null    int64  
 8   tax      506 non-null    int64  
 9   ptratio  506 non-null    float64
 10  black    506 non-null    float64
 11  lstat    506 non-null    float64
 12  medv     506 non-null    float64
dtypes: float64(11), int64(2)
memory usage: 51.5 KB


In [151]:
X = data.iloc[:,:12]
y= data.iloc[:,12]

In [152]:
X.shape, y.shape

((506, 12), (506,))

In [153]:
X_train,X_test,y_train,y_test = train_test_split(
    X,y,
    random_state=42,
    test_size=0.2
)

In [154]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [155]:
np.random.seed(42)
tf.random.set_seed(42)

In [156]:
model = keras.Sequential(
    [
        keras.layers.Input(shape=(X_train.shape[1],)),
        keras.layers.Dense(3, activation=None),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(2, activation=None),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation=None),
    ]
)

In [157]:
model.summary()

In [158]:
from keras.optimizers import Adam
from keras.metrics import RootMeanSquaredError
model.compile(
    optimizer= Adam(learning_rate=0.01),
    loss = 'mse',
    metrics = [RootMeanSquaredError()]
)

In [159]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    patience = 10,
    restore_best_weights = True
)

In [160]:
history = model.fit(
    X_train,y_train,
    epochs = 300,
    batch_size = 32,
    validation_split = 0.2,
    verbose = 1,
    callbacks = [early_stopping]
)

Epoch 1/300
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 608.6987 - root_mean_squared_error: 24.6718 - val_loss: 542.4240 - val_root_mean_squared_error: 23.2900
Epoch 2/300
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 585.3491 - root_mean_squared_error: 24.1940 - val_loss: 521.0058 - val_root_mean_squared_error: 22.8256
Epoch 3/300
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 569.1430 - root_mean_squared_error: 23.8567 - val_loss: 500.7821 - val_root_mean_squared_error: 22.3782
Epoch 4/300
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 546.5208 - root_mean_squared_error: 23.3778 - val_loss: 479.6889 - val_root_mean_squared_error: 21.9018
Epoch 5/300
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 516.7105 - root_mean_squared_error: 22.7313 - val_loss: 461.8419 - val_root_mean_squared_error: 21.4905
Epoch 6/300
[