<a href="https://colab.research.google.com/github/QadqadK/MachineLearning/blob/main/keras_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook goes along with the Deep Learning using Keras tutorial on Medium.

In [14]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical

In this first model, we are going to predict the hourly wages of employees.

In [15]:
#read in training data
train_df = pd.read_csv('/content/hourly_wages_data.csv')

#view data structure
train_df.head()

Unnamed: 0,wage_per_hour,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
0,5.1,0,8,21,35,1,1,0,1,0
1,4.95,0,9,42,57,1,1,0,1,0
2,6.67,0,12,1,19,0,0,0,1,0
3,4.0,0,12,4,22,0,0,0,0,0
4,7.5,0,12,17,35,0,1,0,0,0


In [16]:
#create a dataframe with all training data except the target column
train_X = train_df.drop(columns=['wage_per_hour'])

#check that the target variable has been removed
train_X.head()


Unnamed: 0,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
0,0,8,21,35,1,1,0,1,0
1,0,9,42,57,1,1,0,1,0
2,0,12,1,19,0,0,0,1,0
3,0,12,4,22,0,0,0,0,0
4,0,12,17,35,0,1,0,0,0


In [17]:
#create a dataframe with only the target column
train_y = train_df[['wage_per_hour']]

#view dataframe
train_y.head()

Unnamed: 0,wage_per_hour
0,5.1
1,4.95
2,6.67
3,4.0
4,7.5


In [18]:
#create model
model = Sequential()

#get number of columns in training data
n_cols = train_X.shape[1]

#add model layers
model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))

#compile model using mse as a measure of model performance
model.compile(optimizer='adam', loss='mean_squared_error')

#set early stopping monitor so the model stops training when it won't improve anymore
early_stopping_monitor = EarlyStopping(patience=3)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [19]:
#train model
model.fit(train_X, train_y, validation_split=0.2, epochs=30, callbacks=[early_stopping_monitor])

Epoch 1/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - loss: 144.2653 - val_loss: 198.1678
Epoch 2/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 112.0739 - val_loss: 161.2820
Epoch 3/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 84.8256 - val_loss: 119.3265
Epoch 4/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 53.1887 - val_loss: 73.4873
Epoch 5/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 28.2200 - val_loss: 40.7112
Epoch 6/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 20.5673 - val_loss: 29.9803
Epoch 7/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 17.4871 - val_loss: 29.6299
Epoch 8/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 16.4799 - val_loss: 31.5550
Epoch 9/30
[1m14/14[0m [32m━━

<keras.src.callbacks.history.History at 0x792b1ab2edd0>

In [20]:
#training a new model on the same data to show the effect of increasing model capacity

#create model
model_mc = Sequential()

#add model layers
model_mc.add(Dense(200, activation='relu', input_shape=(n_cols,)))
model_mc.add(Dense(200, activation='relu'))
model_mc.add(Dense(200, activation='relu'))
model_mc.add(Dense(1))

#compile model using mse as a measure of model performance
model_mc.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [21]:
#train model
model_mc.fit(train_X, train_y, validation_split=0.2, epochs=30, callbacks=[early_stopping_monitor])

Epoch 1/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: 44.3293 - val_loss: 59.6263
Epoch 2/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 17.4784 - val_loss: 33.8023
Epoch 3/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 17.9012 - val_loss: 32.6890
Epoch 4/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 15.8589 - val_loss: 34.2137
Epoch 5/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 24.5852 - val_loss: 37.2614
Epoch 6/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 21.2158 - val_loss: 41.5529


<keras.src.callbacks.history.History at 0x792b171bcad0>

For this next model, we are going to predict if patients have diabetes or not.

In [22]:
#read in training data
train_df_2 = pd.read_csv('/content/diabetes.csv')

#view data structure
train_df_2.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,2,138,62,35,0,33.6,0.127,47,1
1,0,84,82,31,125,38.2,0.233,23,0
2,0,145,0,0,0,44.2,0.63,31,1
3,0,135,68,42,250,42.3,0.365,24,1
4,1,139,62,41,480,40.7,0.536,21,0


In [23]:
#create a dataframe with all training data except the target column
train_X_2 = train_df_2.drop(columns=['DiabetesPedigreeFunction'])

#check that the target variable has been removed
train_X_2.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,Age,Outcome
0,2,138,62,35,0,33.6,47,1
1,0,84,82,31,125,38.2,23,0
2,0,145,0,0,0,44.2,31,1
3,0,135,68,42,250,42.3,24,1
4,1,139,62,41,480,40.7,21,0


In [24]:
num_classes = train_df_2['DiabetesPedigreeFunction'].nunique()  # Get the number of unique values

#one-hot encode target column
train_y_2 = to_categorical(train_df_2.DiabetesPedigreeFunction, num_classes=num_classes)

#vcheck that target column has been converted
train_y_2[0:5]

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [25]:
#create model
model_2 = Sequential()

#get number of columns in training data
n_cols_2 = train_X_2.shape[1]

#add layers to model
model_2.add(Dense(250, activation='relu', input_shape=(n_cols_2,)))
model_2.add(Dense(250, activation='relu'))
model_2.add(Dense(250, activation='relu'))
model_2.add(Dense(num_classes, activation='softmax'))

#compile model using accuracy to measure model performance
model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
#train model
model_2.fit(train_X_2, train_y_2, epochs=30, validation_split=0.2, callbacks=[early_stopping_monitor])

Epoch 1/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.8475 - loss: 1.7401 - val_accuracy: 0.8150 - val_loss: 0.4746
Epoch 2/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.9004 - loss: 0.4240 - val_accuracy: 0.9350 - val_loss: 0.2715
Epoch 3/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.9252 - loss: 0.2977 - val_accuracy: 0.9350 - val_loss: 0.2862
Epoch 4/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9294 - loss: 0.2747 - val_accuracy: 0.9350 - val_loss: 0.2577
Epoch 5/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9230 - loss: 0.3127 - val_accuracy: 0.9275 - val_loss: 0.2750
Epoch 6/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.9315 - loss: 0.2704 - val_accuracy: 0.9350 - val_loss: 0.2500
Epoch 7/30
[1m50/50[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x792b15c87850>