<a href="https://colab.research.google.com/github/bozorgpanah/WASP-Software-Engineering-Project/blob/main/Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Regression>>>predict the output of a continuous value
#Dataset>>>classic Auto MPG 
!pip install tensorflow --upgrade

# Use seaborn for pairplot
!pip install seaborn

# Use some functions from tensorflow_docs
!pip install git+https://github.com/tensorflow/docs

from __future__ import absolute_import, division, print_function, unicode_literals
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

print(tf.__version__)

#Downloading the dataset
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
dataset_path

#Importing the data using Pandas library
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight', 'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names, na_values = "?", comment='\t', sep=" ", skipinitialspace=True)
dataset = raw_dataset.copy()
dataset.tail()

#Cleanin the data
dataset.isna().sum()
dataset = dataset.dropna()
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
dataset = pd.get_dummies(dataset, prefix='', prefix_sep='')
dataset.tail()

#Spliting the data into train and test
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
sns.pairplot(train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde") #Graph
#overall statistics
train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()
train_stats

#Splitting features from labels
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
#Normalizing the data
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

#Building the model
def build_model():
  model = keras.Sequential([layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]), layers.Dense(64, activation='relu'), layers.Dense(1)])
  optimizer = tf.keras.optimizers.RMSprop(0.001)
  model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
  return model

model = build_model()

#Inspecting the model
model.summary()
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result

#Training the model
EPOCHS = 1000

history = model.fit(normed_train_data, train_labels, epochs=EPOCHS, validation_split = 0.2, 
                    verbose=0, callbacks=[tfdocs.modeling.EpochDots()])
hist = pd.DataFrame(history.history)
print(hist, end='\n')
hist['epoch'] = history.epoch
hist.tail()
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)
plotter.plot({'Basic': history}, metric = "mae")
plt.ylim([0, 10])
plt.ylabel('MAE [MPG]')
plotter.plot({'Basic': history}, metric = "mse")
plt.ylim([0, 20])
plt.ylabel('MSE [MPG^2]')

model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

early_history = model.fit(normed_train_data, train_labels, 
                    epochs=EPOCHS, validation_split = 0.2, verbose=0, 
                    callbacks=[early_stop, tfdocs.modeling.EpochDots()])
plotter.plot({'Early Stopping': early_history}, metric = "mae")
plt.ylim([0, 10])
plt.ylabel('MAE [MPG]')

loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))
print("Testing set MSE: {:5.2f} MPG".format(mse))
print("Testing set Loss: {:5.2f} MPG".format(loss))

#Making prediction
test_predictions = model.predict(normed_test_data).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")

In [47]:
def test_fit():
  assert model.fit(564.070984, 22.437719, 564.070984, 559.131714, 22.302734, 559.131714) == 0
  assert model.fit(513.879761, 21.357210, 513.879761, 507.980621, 21.178234, 507.980621) == 1
  assert model.fit(463.255859, 20.189146, 463.255859, 450.778381, 19.837639, 450.778381) == 2
  assert model.fit(406.922943, 18.822338, 406.922943, 387.425171, 18.249229, 387.425171) == 3
  assert model.fit(5.9705, 1.7080, 5.9705, 7.8259, 2.1053, 7.8259,) == 100
  assert model.fit(5.3365, 1.5836, 5.3365, 7.7012, 2.0999, 7.7012,) == 200
  assert model.fit(4.7098, 1.4529, 4.7098, 7.9523, 2.1381, 7.9523,) == 300
  assert model.fit(4.4066, 1.4014, 4.4066, 8.0648, 2.1912, 8.0648,) == 400
  assert model.fit(3.8809, 1.3007, 3.8809, 8.2515, 2.2030, 8.2515, ) == 500
  assert model.fit(3.0778, 1.1236, 3.0778, 7.9727, 2.1150, 7.9727,) == 700
  assert model.fit(2.8492, 1.0555, 2.8492, 7.8695, 2.0979, 7.8695) == 800
  assert model.fit(2.5882, 1.0104, 2.5882, 8.0267, 2.1229, 8.0267,) == 900
  assert model.fit(2.421557, 1.021443, 2.421557, 7.761631, 2.131796, 7.761631) == 995
  assert model.fit(2.417806, 0.946623, 2.417806, 7.872272, 2.109151, 7.872272) == 996
  assert model.fit(2.397120, 0.978255, 2.397120, 8.160164, 2.095555, 8.160164) == 997
  assert model.fit(2.497679, 1.040963, 2.497679, 7.729835, 2.142477, 7.729835) == 999
