In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
column = ['number_of_cylinders',
          'engine_type',
          'engine_hp',
          'engine_hp_rpm',
          'transmission',
          'acceleration_0_100_km/h_s',
          'fuel_grade',
          'mixed_fuel_consumption_per_100_km_l']

target = 'mixed_fuel_consumption_per_100_km_l'

df = pd.read_csv('Dataset/dataset_cleaned.csv', usecols=column, low_memory=False)
df.head()

Unnamed: 0,number_of_cylinders,engine_type,engine_hp,engine_hp_rpm,transmission,mixed_fuel_consumption_per_100_km_l,acceleration_0_100_km/h_s,fuel_grade
0,8,0,354.0,6500,1,13.4,5.6,3
1,8,0,326.0,5800,1,12.2,6.1,3
2,6,0,240.0,5750,0,13.2,8.7,2
3,6,0,290.0,6200,0,10.4,7.6,3
4,6,0,255.0,6800,1,10.0,5.0,3


**TEST MODEL**

This method is based on tensorflow's mpg regression: https://www.tensorflow.org/tutorials/keras/regression

In [3]:
train_data = df.sample(frac=0.8, random_state=0)
test_data = df.drop(train_data.index)
print(f"Train data: {len(train_data)}")
print(f"Test data: {len(test_data)}")

Train data: 14876
Test data: 3719


In [4]:
train_features = train_data.copy()
test_features = test_data.copy()

train_labels = train_features.pop(target)
test_labels = test_features.pop(target)

In [5]:
train_data.describe().transpose()[['mean', 'std']]

Unnamed: 0,mean,std
number_of_cylinders,4.657368,1.353355
engine_type,0.334902,0.471972
engine_hp,173.874899,93.959119
engine_hp_rpm,5221.490992,1006.727885
transmission,0.588734,0.49208
mixed_fuel_consumption_per_100_km_l,7.769505,2.608764
acceleration_0_100_km/h_s,10.128542,3.069313
fuel_grade,1.905351,1.385076


In [6]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_features))
print(normalizer.mean.numpy())

[[4.6573682e+00 3.3490193e-01 1.7387497e+02 5.2214941e+03 5.8873391e-01
  1.0128551e+01 1.9053503e+00]]


In [7]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=1)
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='mean_absolute_error',
              metrics=['mae'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 7)                15        
 n)                                                              
                                                                 
 dense (Dense)               (None, 1)                 8         
                                                                 
Total params: 23
Trainable params: 8
Non-trainable params: 15
_________________________________________________________________


In [8]:
history = model.fit(train_features, train_labels, epochs=50, verbose=1, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


lol, loss sama mae nya sama nilainya tiap epoch. sus