In [19]:
import numpy as np
import pandas as pd
from math import floor, ceil
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, mean_squared_error, classification_report, roc_auc_score, mean_squared_log_error
from sklearn.neural_network import MLPRegressor
from keras.utils import np_utils
from neural_network import NeuralNetwork
from fc_layer import FCLayer
from activation_layer import ActivationLayer
from activation_functions import sigmoid, sigmoid_derivative, identity, identity_derivative, tanh, tanh_derivative, relu, relu_derivative
from loss_functions import mse, mse_derivative
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler

In [14]:
training_data = pd.read_csv('cleaned_cars.csv')

for col in ['year', 'mileage', 'tax', 'mpg']:
    normalized = normalize([np.asarray(training_data[col])])
    training_data.drop(col, axis=1, inplace=True)
    training_data[col] = normalized[0]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    training_data.loc[:, training_data.columns != 'price'], training_data["price"], test_size=0.20)

X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [22]:
clf = MLPRegressor(solver='lbfgs', alpha=0.1, hidden_layer_sizes=(5, 10, 3), max_iter=1000, activation='tanh', random_state=0)
clf.fit(X_train, y_train)
y_pred_sklearn = clf.predict(X_test)
print(mean_squared_log_error(y_test, y_pred_sklearn))

0.14602243477313698


In [23]:

network = NeuralNetwork()
network.add(FCLayer(X_train.shape[1], 5))
network.add(ActivationLayer(tanh, tanh_derivative))
network.add(FCLayer(5, 10))
network.add(ActivationLayer(tanh, tanh_derivative))
network.add(FCLayer(10, 3))
network.add(ActivationLayer(tanh, tanh_derivative))
network.add(FCLayer(3, 1))
network.use(mse, mse_derivative)
network.fit(X_train, y_train, epochs=1000, learning_rate=0.1)
y_pred = np.array(network.predict(X_test))
print(y_test, y_test.shape)
print(y_pred, y_pred.shape)
print(mean_squared_log_error(y_test, y_pred))

[39995 12995 12595 ... 22971 29000 23850] (1447,)
[[25249.13278936]
 [25249.13278936]
 [25249.13278936]
 ...
 [25249.13278936]
 [25249.13278936]
 [25249.13278936]] (1447, 1)
0.2034644891024033
