In [None]:
#Charottama Oshmar D. 6599126
#Zhixing Yang 5524726
#Christophorus Ivan Darmasaputra 5699551

In [1]:
#importing all the necessary modules
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, BatchNormalization
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint
from keras import optimizers
from keras import regularizers
from keras.utils.np_utils import to_categorical

#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Conv1D, MaxPool1D, Flatten
#from tensorflow.keras.layers import Dense, Dropout

Using TensorFlow backend.


In [2]:
#loading the dataset using pandas
missing_values = ["n/a", "na", "--", ""]

abalone = pd.read_csv("/Users/charottamaoshmar/Desktop/Dataset-20191002/abaloneData.txt", na_values = missing_values)
abalone.columns=['sex', 'length', 'diameter', 'height', 'w1', 'w2', 'w3', 'w4', 'rings']

In [3]:
#checking if dataset is loaded properly
abalone.head()

Unnamed: 0,sex,length,diameter,height,w1,w2,w3,w4,rings
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [4]:
#part of preprocessing, finding out null/missing values
abalone.isnull().sum()

sex         0
length      0
diameter    0
height      0
w1          0
w2          0
w3          0
w4          0
rings       0
dtype: int64

In [5]:
#checking shape of dataset
abalone.shape

(4176, 9)

In [6]:
#finding out the unique values in 'rings'
#this is a regression problem
abalone['rings'].unique()

array([ 7,  9, 10,  8, 20, 16, 19, 14, 11, 12, 15, 18, 13,  5,  4,  6, 21,
       17, 22,  1,  3, 26, 23, 29,  2, 27, 25, 24])

In [7]:
#checking datatypes of each column. Notice that 'sex' 
#is an object dtype column and we need to convert that
abalone.dtypes

sex          object
length      float64
diameter    float64
height      float64
w1          float64
w2          float64
w3          float64
w4          float64
rings         int64
dtype: object

In [8]:
#making the dictionary and replacing the values inside 'sex' column
sex_replace = ({'sex': {'M':0, 'F':1, 'I':2}})

In [9]:
abalone = abalone.replace(sex_replace)

In [10]:
#one hot encoding for the values we assigned in 'sex'

from keras.utils.np_utils import to_categorical

abalone['sex'] = to_categorical(abalone.sex, 3)

In [11]:
#now 'sex' is a float datatype so we can feed to the ML algorithm
abalone.dtypes

sex         float32
length      float64
diameter    float64
height      float64
w1          float64
w2          float64
w3          float64
w4          float64
rings         int64
dtype: object

In [12]:
#rechecking dataset after replacement
abalone.head()

Unnamed: 0,sex,length,diameter,height,w1,w2,w3,w4,rings
0,1.0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,0.0,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,1.0,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,0.0,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,0.0,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [15]:
#shuffling rows in the dataset for the sampling
abalone = abalone.sample(frac=1).reset_index(drop=True)

In [13]:
#assigning columns to x and y. x is features, y is the target (label)
x = abalone.iloc[:, 0:8]
y = abalone.iloc[:, 8]

In [14]:
#showing x (features) of original dataset. x does
#not have 'rings' column, and is shuffled
#(compare to abalone.head() above)
x.head()

Unnamed: 0,sex,length,diameter,height,w1,w2,w3,w4
0,1.0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07
1,0.0,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21
2,1.0,0.44,0.365,0.125,0.516,0.2155,0.114,0.155
3,0.0,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055
4,0.0,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12


In [15]:
x_train = x[:int((len(x)*0.7))]
x_test = x[int((len(x)*0.7)):]
y_train = y[:int((len(y)*0.7))]
y_test = y[int((len(y)*0.7)):]

In [16]:
x_train

Unnamed: 0,sex,length,diameter,height,w1,w2,w3,w4
0,1.0,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700
1,0.0,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100
2,1.0,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550
3,0.0,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550
4,0.0,0.425,0.300,0.095,0.3515,0.1410,0.0775,0.1200
5,0.0,0.530,0.415,0.150,0.7775,0.2370,0.1415,0.3300
6,0.0,0.545,0.425,0.125,0.7680,0.2940,0.1495,0.2600
7,1.0,0.475,0.370,0.125,0.5095,0.2165,0.1125,0.1650
8,0.0,0.550,0.440,0.150,0.8945,0.3145,0.1510,0.3200
9,0.0,0.525,0.380,0.140,0.6065,0.1940,0.1475,0.2100


In [19]:
#chacking the shape after splitting into training and testing dataset
x_train.shape

(2923, 8)

In [20]:
y_test.shape

(1253,)

In [22]:
#now let's build the model. Keras provides a Sequential() model to build
#MLPs and ANNs. 

from keras import regularizers

mlp_model = Sequential()

#Dense(12) is the first hidden layer with 12 nodes which take 8 inputs,
#being the number of features. Regularizer allow penalties to be incorporated in the loss function
# We use the Rectified Linear Unit for the activation of the hidden layers
mlp_model.add(Dense(12, input_dim=8, kernel_regularizer=regularizers.l2(0.01), activation='relu'))

#Dropout(0.2) is a way to prevent overfitting by *dropping out* some of the nodes
#of that layer. 0.2 is the dropout ratio (out of 1)
mlp_model.add(Dropout(0.2))

#Second hidden layer, containing 6 nodes
mlp_model.add(Dense(6, activation='relu'))
mlp_model.add(Dropout(0.2))

#As a regression problem, the output is 1 because there is nothing to classify
#from the result. Acttivation is set as None for this regression problem too
mlp_model.add(Dense(1, activation='linear'))

In [23]:
#Make a summary of the model we made. 
mlp_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 12)                108       
_________________________________________________________________
dropout_1 (Dropout)          (None, 12)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 6)                 78        
_________________________________________________________________
dropout_2 (Dropout)          (None, 6)                 0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 7         
Total params: 193
Trainable params: 193
Non-trainable params: 0
_________________________________________________________________


In [24]:
#Keras provides options to use for the loss and optimizer function.
#In a Keras model, these are the hyperparameters that must be defined
#To check the performance we use MSE (mean squared error)
from keras import optimizers

opt = optimizers.Adam(lr=0.1)

mlp_model.compile(loss='mean_squared_error', optimizer=opt, metrics=['mse'])

In [25]:
history = mlp_model.fit(x_train, y_train, epochs=300, batch_size=32, verbose=2)

Epoch 1/300
 - 0s - loss: 25.2828 - mse: 25.1101
Epoch 2/300
 - 0s - loss: 10.9210 - mse: 10.8054
Epoch 3/300
 - 0s - loss: 9.0526 - mse: 8.9112
Epoch 4/300
 - 0s - loss: 8.4914 - mse: 8.2758
Epoch 5/300
 - 0s - loss: 8.7654 - mse: 8.4564
Epoch 6/300
 - 0s - loss: 9.3747 - mse: 9.0676
Epoch 7/300
 - 0s - loss: 8.9724 - mse: 8.6921
Epoch 8/300
 - 0s - loss: 8.9804 - mse: 8.6984
Epoch 9/300
 - 0s - loss: 8.3141 - mse: 8.0418
Epoch 10/300
 - 0s - loss: 8.5963 - mse: 8.3301
Epoch 11/300
 - 0s - loss: 8.8801 - mse: 8.6228
Epoch 12/300
 - 0s - loss: 8.6570 - mse: 8.4028
Epoch 13/300
 - 0s - loss: 8.5356 - mse: 8.2771
Epoch 14/300
 - 0s - loss: 8.4343 - mse: 8.1528
Epoch 15/300
 - 0s - loss: 8.1958 - mse: 7.9225
Epoch 16/300
 - 0s - loss: 8.6078 - mse: 8.3607
Epoch 17/300
 - 0s - loss: 8.4795 - mse: 8.2458
Epoch 18/300
 - 0s - loss: 8.6412 - mse: 8.4108
Epoch 19/300
 - 0s - loss: 8.5821 - mse: 8.3380
Epoch 20/300
 - 0s - loss: 8.5393 - mse: 8.2924
Epoch 21/300
 - 0s - loss: 8.9171 - mse: 8.67

 - 0s - loss: 10.5545 - mse: 10.5545
Epoch 169/300
 - 0s - loss: 10.5254 - mse: 10.5254
Epoch 170/300
 - 0s - loss: 10.5311 - mse: 10.5311
Epoch 171/300
 - 0s - loss: 10.5353 - mse: 10.5353
Epoch 172/300
 - 0s - loss: 10.5253 - mse: 10.5253
Epoch 173/300
 - 0s - loss: 10.5217 - mse: 10.5217
Epoch 174/300
 - 0s - loss: 10.5221 - mse: 10.5221
Epoch 175/300
 - 0s - loss: 10.5375 - mse: 10.5375
Epoch 176/300
 - 0s - loss: 10.5406 - mse: 10.5406
Epoch 177/300
 - 0s - loss: 10.5554 - mse: 10.5554
Epoch 178/300
 - 0s - loss: 10.5200 - mse: 10.5200
Epoch 179/300
 - 0s - loss: 10.5538 - mse: 10.5538
Epoch 180/300
 - 0s - loss: 10.5578 - mse: 10.5578
Epoch 181/300
 - 0s - loss: 10.5359 - mse: 10.5359
Epoch 182/300
 - 0s - loss: 10.5189 - mse: 10.5189
Epoch 183/300
 - 0s - loss: 10.5563 - mse: 10.5563
Epoch 184/300
 - 0s - loss: 10.5445 - mse: 10.5445
Epoch 185/300
 - 0s - loss: 10.5358 - mse: 10.5358
Epoch 186/300
 - 0s - loss: 10.5295 - mse: 10.5295
Epoch 187/300
 - 0s - loss: 10.5445 - mse: 10

KeyboardInterrupt: 

In [None]:
#Printing graph of our training 
%matplotlib inline

plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [24]:
#Use the trained model for the dataset. End result is MSE = 4.950728
#which is pretty good
score = mlp_model.evaluate(x_test, y_test)
print(mlp_model.metrics_names[1], score[1])

mse 4.587838


In [25]:
#Checking default learning rate
import keras.backend as K

print(K.eval(mlp_model.optimizer.lr))

0.001


In [26]:
#self=implemented MLP
#we need to use tensorflow v1 because tf.placeholder is deprecated
#in tensorflow v2
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

#input neuron must be of same size to number of features column 
#hidden neurons can be any number, but output neuron is 1 because
#this is a regression problem
n_neurons_h = 8
n_neurons_out = 1
n_input = 8

Instructions for updating:
non-resource variables are not supported in the long term


In [27]:
#defining placeholder variables
X = tf.placeholder(tf.float32, shape=(None, n_input))
Y = tf.placeholder(tf.float32, shape=(None))

In [28]:
#weights and biases
W1 = tf.get_variable("weights1", dtype=tf.float32, initializer=tf.zeros((n_input, n_neurons_h)))
b1 = tf.get_variable("bias1", dtype=tf.float32, initializer=tf.zeros((n_neurons_h)))
W2 = tf.get_variable("weights2", dtype=tf.float32, initializer=tf.zeros((n_input, n_neurons_out)))
b2 = tf.get_variable("bias2", dtype=tf.float32, initializer=tf.zeros((n_neurons_out)))

In [29]:
#make the network
#use relu as the activation function
h = tf.nn.relu(tf.matmul(X, W1)+ b1)
z = tf.matmul(h, W2) + b2
#use MSE to measure the model’s performance
#(the cost function)
cost = tf.losses.mean_squared_error(labels=Y, predictions=z)

In [30]:
n_epochs = 300

#set the learning rate
learning_rate = 0.01

#create an optimizer by using the gradient descent optimization 
#function which is available in tensorflow module
optimizer = tf.train.GradientDescentOptimizer(learning_rate)

#create a train operation by calling the minimise method of the optimizer to update the variables 
train_op = optimizer.minimize(cost)

In [31]:
#initialize the variables and execute training 
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    for epoch in range (n_epochs):
        sess.run(train_op, feed_dict = {X: x_train, Y:y_train})
        print('cost :',sess.run(cost, feed_dict={X:x_test,Y:y_test}))

cost : 105.58313
cost : 101.82592
cost : 98.217354
cost : 94.75157
cost : 91.42261
cost : 88.22583
cost : 85.153854
cost : 82.20502
cost : 79.371864
cost : 76.650925
cost : 74.03779
cost : 71.52757
cost : 69.11674
cost : 66.800934
cost : 64.576935
cost : 62.440723
cost : 60.38899
cost : 58.418194
cost : 56.525215
cost : 54.707344
cost : 52.961002
cost : 51.28365
cost : 49.67273
cost : 48.125305
cost : 46.63916
cost : 45.21154
cost : 43.84051
cost : 42.52345
cost : 41.25844
cost : 40.043404
cost : 38.87625
cost : 37.75529
cost : 36.678658
cost : 35.644444
cost : 34.651237
cost : 33.696938
cost : 32.780437
cost : 31.900112
cost : 31.054487
cost : 30.242222
cost : 29.462206
cost : 28.712788
cost : 27.992914
cost : 27.301653
cost : 26.637432
cost : 25.99951
cost : 25.386765
cost : 24.798183
cost : 24.232779
cost : 23.689672
cost : 23.168007
cost : 22.666899
cost : 22.1856
cost : 21.723198
cost : 21.279089
cost : 20.852442
cost : 20.442581
cost : 20.048803
cost : 19.670687
cost : 19.307518


In [32]:
n_records = x_train.shape

#set the batch size
batch_size = 100

#random choose the index in the total 2923 records
rand_index = np.random.choice(2923, size=batch_size)

#get the training values in these index
x_batch = x_train.values[rand_index, :]
y_batch = y_train.values[rand_index]
y_batch

#execute the validation with the batch size implemented 
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        sess.run(train_op, feed_dict = {X: x_batch, Y:y_batch})
        print(sess.run(cost, feed_dict={X:
        x_test, Y:y_test}))

105.73243
102.11624
98.64001
95.29865
92.08641
88.99864
86.030426
83.176735
80.43358
77.796646
75.26131
72.82371
70.48036
68.22717
66.061264
63.978172
61.97549
60.050148
58.198433
56.41825
54.706345
53.060158
51.477123
49.954746
48.49072
47.082787
45.72868
44.426456
43.17395
41.96931
40.81066
39.696087
38.624054
37.59293
36.60092
35.646786
34.7288
33.845657
32.99607
32.178802
31.392029
30.63559
29.907427
29.207018
28.532814
27.884052
27.259836
26.659088
26.080893
25.524424
24.988876
24.473389
23.977215
23.49958
23.039827
22.597284
22.17115
21.76089
21.365961
20.98567
20.619497
20.266926
19.927399
19.600454
19.28557
18.982384
18.69027
18.408766
18.137825
17.876747
17.6253
17.382948
17.14956
16.924658
16.707968
16.499105
16.297886
16.103981
15.917052
15.736898
15.563261
15.395853
15.234609
15.078901
14.9289875
14.784382
14.644948
14.510469
14.380844
14.255734
14.1351385
14.018776
13.906552
13.798279
13.693814
13.593021
13.495749
13.401989
13.311363
13.223965
13.139572
13.058161
12.979553