In [None]:
!pip install tensorflow==2.16.1

In [None]:
# Importing necessary libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

# The Auto MPG Dataset

In [None]:
# Get the data
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']

# Load the dataset
raw_dataset = pd.read_csv(url, names=column_names,
                          na_values='?', comment='\t',
                          sep=' ', skipinitialspace=True)

In [None]:
# Make a copy of the dataset
data = raw_dataset.copy()
data.head()

# Clean the data

In [None]:
# Check the unknowns values
data.isna().sum()

In [None]:
# Drop rows with missing values
data = data.dropna()

In [None]:
# Map 'Origin' column to country names
data['Origin'] = data['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
data.head()

In [None]:
# Convert categorical 'Origin' column to one-hot encoding
data = pd.get_dummies(data, columns=['Origin'], prefix='', prefix_sep='')
data.head()

# Split the data into training and test sets

In [None]:
# Sample 80% of the data for training
train_dataset = data.sample(frac=0.8, random_state=0)

# Sample the remaining 20% for testing
test_dataset = data.drop(train_dataset.index)

# Inspect the data

In [None]:
# Create pair plots of selected features
sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')

In [None]:
# Display summary statistics of the training dataset
train_dataset.describe().transpose()

# Split features from labels

In [None]:
# Separate the 'MPG' column as labels
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')

# Normalization

In [None]:
# Display mean and standard deviation of the training dataset
train_dataset.describe().transpose()[['mean', 'std']]

# The Normalization layer

In [None]:
# Create a normalization layer
normalizer = tf.keras.layers.Normalization(axis=-1)

In [None]:
# Adapt the normalizer to the training features
normalizer.adapt(np.array(train_features))

In [None]:
# Display the mean values after normalization
print(normalizer.mean.numpy())

In [None]:
# Example of the first row before and after normalization
first = np.array(train_features[:1])

with np.printoptions(precision=2, suppress=True):
  print('First example:', first)
  print()
  print('Normalized:', normalizer(first).numpy())

# Linear regression

In [None]:
# Extract 'Horsepower' column as numpy array
horsepower = np.array(train_features['Horsepower'])

# Create a normalization layer for "Horsepower"
horsepower_normalizer = layers.Normalization(input_shape=[1,], axis=None)
horsepower_normalizer.adapt(horsepower)

In [None]:
# Build a linear regression model for "Horsepower"
horsepower_model = tf.keras.Sequential([
    horsepower_normalizer,
    layers.Dense(units=1)
])

# Display model summary
horsepower_model.summary()

In [None]:
# Predict the first 10 values of 'Horsepower'
horsepower_model.predict(horsepower[:10])

In [None]:
# Compile the model with Adam optimizer and mean absolute error loss
horsepower_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
%%time
history = horsepower_model.fit(
    train_features['Horsepower'],
    train_labels,
    epochs=100,
    # Suppress logging.
    verbose=0,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2)

In [None]:
# Convert training history to DataFrame and display the last few rows
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
# Function to plot training loss
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.ylim([0, 10])
  plt.xlabel('Epoch')
  plt.ylabel('Error [MPG]')
  plt.legend()
  plt.grid(True)

In [None]:
plot_loss(history)

In [None]:
# Evaluate the model on the test set
test_results = {}

test_results['horsepower_model'] = horsepower_model.evaluate(
    test_features['Horsepower'],
    test_labels, verbose=0)

In [None]:
# Generate predictions for a range of 'Horsepower' values
x = tf.linspace(0.0, 250, 251)
y = horsepower_model.predict(x)

In [None]:
# Function to plot 'Horsepower' vs 'MPG' predictions
def plot_horsepower(x, y):
  plt.scatter(train_features['Horsepower'], train_labels, label='Data')
  plt.plot(x, y, color='k', label='Predictions')
  plt.xlabel('Horsepower')
  plt.ylabel('MPG')
  plt.legend()

In [None]:
plot_horsepower(x, y)

# Linear regression with multiple inputs

In [None]:
# Build a linear regression model with multiple inputs
linear_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])

In [None]:
# Predict the first 10 examples
linear_model.predict(train_features[:10])

In [None]:
# Display the kernel (weights) of the dense layer
linear_model.layers[1].kernel

In [None]:
# Compile the model
linear_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
%%time
history = linear_model.fit(
    train_features,
    train_labels,
    epochs=100,
    # Suppress logging.
    verbose=0,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2)

In [None]:
# Plot training loss
plot_loss(history)

In [None]:
# Evaluate the model on the test set
test_results['linear_model'] = linear_model.evaluate(
    test_features, test_labels, verbose=0)

# Regression with a deep neural network (DNN)

In [None]:
# Function to build and compile a DNN model
def build_and_compile_model(norm):
  model = keras.Sequential([
      norm,
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
  return model

# Regression using a DNN and a single input

In [None]:
# Build and compile a DNN model for 'Horsepower'
dnn_horsepower_model = build_and_compile_model(horsepower_normalizer)

In [None]:
# Display model summary
dnn_horsepower_model.summary()

In [None]:
%%time
history = dnn_horsepower_model.fit(
    train_features['Horsepower'],
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)

In [None]:
# Plot training loss
plot_loss(history)

In [None]:
# Generate predictions for a range of 'Horsepower' values
x = tf.linspace(0.0, 250, 251)
y = dnn_horsepower_model.predict(x)

In [None]:
# Plot 'Horsepower' vs 'MPG' predictions
plot_horsepower(x, y)

In [None]:
# Evaluate the model on the test set
test_results['dnn_horsepower_model'] = dnn_horsepower_model.evaluate(
    test_features['Horsepower'], test_labels,
    verbose=0)

# Regression using a DNN and multiple inputs

In [None]:
# Build and compile a DNN model with multiple inputs
dnn_model = build_and_compile_model(normalizer)
dnn_model.summary()

In [None]:
%%time
history = dnn_model.fit(
    train_features,
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)

In [None]:
# Plot training loss
plot_loss(history)

In [None]:
# Evaluate the model on the test set
test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)

# Performance

In [None]:
# Display the test results as a DataFrame
pd.DataFrame(test_results, index=['Mean absolute error [MPG]']).T

# Make predictions

In [None]:
# Predict MPG values for the test set
test_predictions = dnn_model.predict(test_features).flatten()

# Plot true vs predicted MPG values
a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
# Plot prediction error histogram
error = test_predictions - test_labels
plt.hist(error, bins=25)
plt.xlabel('Prediction Error [MPG]')
_ = plt.ylabel('Count')

In [None]:
# Save the DNN model
dnn_model.save('dnn_model.keras')

In [None]:
# Load the saved model
reloaded = tf.keras.models.load_model('dnn_model.keras')

# Evaluate the reloaded model on the test set
test_results['reloaded'] = reloaded.evaluate(
    test_features, test_labels, verbose=0)

In [None]:
# Display the final test results
pd.DataFrame(test_results, index=['Mean absolute error [MPG]']).T