<h2> Importing Important Packages 

In [34]:
from math import sqrt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

<h2>Importing Dataset that already preprocessed

In [35]:
preprocessed_dataset = pd.read_csv(
    "data/preprocessed.csv", names=['Year-3', 'Year-2', 'Year-1', 'Target'], sep="," ,skiprows=[0])

preprocessed_dataset.head()

Unnamed: 0,Year-3,Year-2,Year-1,Target
0,644642,643266,642120,661179
1,643266,642120,661179,647198
2,642120,661179,647198,621605
3,661179,647198,621605,612393
4,647198,621605,612393,654289


<h2> Split Dataset into 2 gourp (train and test test)

In [36]:
train_data = preprocessed_dataset.sample(frac=0.9, random_state=0)
test_data = preprocessed_dataset.drop(train_data.index)

train_labels = train_data.pop('Target')
test_labels = test_data.pop('Target')

<h2> make Modeling function

In [37]:
def build_model():
    model = keras.Sequential([
        layers.Dense(13, input_dim=3, kernel_initializer='normal', activation='relu'),
        layers.Dense(13, kernel_initializer='normal', activation='relu'),
        layers.Dense(1, kernel_initializer='normal')
    ])

    optimizer = tf.optimizers.RMSprop(0.001)

    model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])

    return model

In [38]:
model = build_model()

model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_36 (Dense)             (None, 13)                52        
_________________________________________________________________
dense_37 (Dense)             (None, 13)                182       
_________________________________________________________________
dense_38 (Dense)             (None, 1)                 14        
Total params: 248
Trainable params: 248
Non-trainable params: 0
_________________________________________________________________


<h2>Train model using k-10 fold validation

In [39]:
kfold = KFold(n_splits=10)

estimator = KerasRegressor(build_fn=build_model, epochs=1000, batch_size=5, verbose=0)
results = cross_val_score(estimator, train_data, train_labels, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Results: -1568721947.30 (1509460527.23) MSE


In [40]:
print(results)
estimator.fit(train_data, train_labels)
prediction = estimator.predict(test_data)

[-1.20092595e+09 -2.70398176e+08 -5.04269280e+08 -2.73164134e+09
 -2.52023117e+09 -1.03136064e+08 -9.20863168e+08 -5.01043354e+09
 -2.41814374e+09 -7.17704100e+06]


In [41]:
print(type(test_labels))
print(type(prediction))

<class 'pandas.core.series.Series'>
<class 'numpy.ndarray'>


<h2> Predict test data

In [42]:
predicted_vs_actual = pd.DataFrame(list(zip(prediction, test_labels.tolist())), 
                columns =['Prediction','Actual'])

predicted_vs_actual

Unnamed: 0,Prediction,Actual
0,731872.3125,798232
1,859657.4375,842934
