In [None]:
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler

In [None]:
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight', 'Acceleration', 'Model Year', 'Origin']

raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

df = raw_dataset.copy()
df = df.dropna()

In [None]:
df.head()

In [None]:
len(df)

In [None]:
import seaborn as sns
sns.pairplot(df[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde",
             plot_kws = {'alpha': 0.6, 's': 80, 'edgecolor': 'k'})

In [None]:
correlation = df.corr()

In [None]:
sns.heatmap(correlation)

In [None]:
df.corr()['MPG']

In [None]:
selected_columns, df.columns

In [None]:
df_stats = df.describe()
df_stats.pop("MPG")
df_stats = df_stats.transpose()
df_stats

In [None]:
to_be_normalized_columns = ['Cylinders', 'Displacement', 'Horsepower', 'Weight',
                            'Acceleration', 'Model Year']

target_column = "MPG"

all_columns = to_be_normalized_columns + [target_column]

standard_scaler_x = StandardScaler(with_mean=True, with_std=True)
df[all_columns] = standard_scaler_x.fit_transform(df[all_columns])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df[to_be_normalized_columns].values,
                                                    df[target_column].values,
                                                    test_size=0.33,
                                                    random_state=42)

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=[6]),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    
    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
    return model

In [None]:
# hyper parameters
EPOCHS = 1000

In [None]:
# train centralized model

model = build_model()
history = model.fit(X_train, y_train, epochs=EPOCHS, validation_split = 0.2, verbose=2)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
loss, mae, mse = model.evaluate(X_test, y_test, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))

In [None]:
from sklearn.metrics import r2_score
pred = model.predict(X_test)
r2_score = r2_score(pred, y_test)
r2_score

In [None]:
import numpy as np
plt.scatter(y_test, pred)

plt.xlabel('Actual values')
plt.ylabel('Predicted values')

plt.plot(np.unique(y_test), np.poly1d(np.polyfit(y_test, pred.reshape(1,-1).flatten(), 1))(np.unique(y_test)))

plt.text(0.6, 0.5, 'R-squared = %0.2f' % r2_score)
plt.show()