# Predicting fuel efficiency with Machine Learning

> The Neural Network model uses the classic Auto dataset which contains information about various car attributes and their corresponding miles per gallon values.

In [None]:
#| default_exp core

## Load auto data and relevant libraries

In [None]:
# Import relevant dependencies for the machine learning project
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import io
import os
import requests
import numpy as np
from sklearn import metrics

In [None]:
# Load data
df = pd.read_csv(r'data/auto-mpg.csv', na_values=['NA','?'])

In [None]:
df.shape

(398, 9)

In [None]:
# Check missing values - need to deal with missing values in horsepower
df.isnull().sum()

mpg             0
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
year            0
origin          0
name            0
dtype: int64

In [None]:
# Deal with the missing values by filling them in with the median value
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())

In [None]:
# Check the missing values again
df.isnull().sum()

mpg             0
cylinders       0
displacement    0
horsepower      0
weight          0
acceleration    0
year            0
origin          0
name            0
dtype: int64

## Define our target and features

In [None]:
x = df[['cylinders', 'displacement', 'horsepower','weight','acceleration','year','origin']].values
y = df['mpg'].values

In [None]:
type(y)

numpy.ndarray

## Split data into train and test values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [None]:
x_train.shape

(298, 7)

## Train the Neural Network on the train data

In [None]:
model = Sequential()
model.add(Dense(25, input_dim=x.shape[1], activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto', restore_best_weights=True)

model.fit(x_train, y_train, validation_data=(x_test, y_test), callbacks=[monitor], verbose=2, epochs=1000)

Epoch 1/1000
10/10 - 6s - loss: 289.0994 - val_loss: 139.6767 - 6s/epoch - 628ms/step
Epoch 2/1000
10/10 - 0s - loss: 131.3682 - val_loss: 81.3089 - 222ms/epoch - 22ms/step
Epoch 3/1000
10/10 - 0s - loss: 91.6769 - val_loss: 79.7147 - 245ms/epoch - 24ms/step
Epoch 4/1000
10/10 - 0s - loss: 79.3116 - val_loss: 57.2720 - 211ms/epoch - 21ms/step
Epoch 5/1000
10/10 - 0s - loss: 64.5642 - val_loss: 47.6098 - 209ms/epoch - 21ms/step
Epoch 6/1000
10/10 - 0s - loss: 55.7570 - val_loss: 41.4851 - 211ms/epoch - 21ms/step
Epoch 7/1000
10/10 - 0s - loss: 48.7589 - val_loss: 33.7690 - 220ms/epoch - 22ms/step
Epoch 8/1000
10/10 - 0s - loss: 42.4368 - val_loss: 29.9907 - 295ms/epoch - 29ms/step
Epoch 9/1000
10/10 - 0s - loss: 38.4295 - val_loss: 25.9246 - 239ms/epoch - 24ms/step
Epoch 10/1000
10/10 - 0s - loss: 33.1909 - val_loss: 21.2726 - 211ms/epoch - 21ms/step
Epoch 11/1000
10/10 - 0s - loss: 30.5271 - val_loss: 20.2363 - 208ms/epoch - 21ms/step
Epoch 12/1000
10/10 - 0s - loss: 27.5326 - val_loss

<keras.src.callbacks.History>

## Predicting on our test data

In [None]:
# Predicting on the test data
prediction = model.predict(x_test)
score = np.sqrt(metrics.mean_squared_error(prediction, y_test))
print(f'After the training the score is: {score}')

After the training the score is: 3.6757342926737295


## Save the model

In [None]:
# Check working directory
os.getcwd()

'C:\\Users\\ishma\\Dev_Workspace\\auto-mpg-prediction\\nbs'

In [None]:
# Save the model as an h5 file
model.save(os.path.join(os.getcwd(),"mpg_model.h5"))
model.save(os.path.join(os.getcwd(),"mpg_model.keras"))

## Preview the boundaries in the dataframe

In [None]:
# Get column names
cols = [x for x in df.columns if x not in ('mpg', 'name')]
cols

['cylinders',
 'displacement',
 'horsepower',
 'weight',
 'acceleration',
 'year',
 'origin']

In [None]:
# Extract min and max values
print("{")
for i,name in enumerate(cols):
    print(f'"{name}":{{"min":{df[name].min()},"max":{df[name].max()}}}{"," if i<(len(cols)-1) else ""}')
print("}")


{
"cylinders":{"min":3,"max":8},
"displacement":{"min":68.0,"max":455.0},
"horsepower":{"min":46.0,"max":230.0},
"weight":{"min":1613,"max":5140},
"acceleration":{"min":8.0,"max":24.8},
"year":{"min":70,"max":82},
"origin":{"min":1,"max":3}
}


In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| hide
def foo(): pass

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()