In [1]:
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error, r2_score, mean_absolute_error

from analytics.machine_learning.price_prediction_with_fundamentals import utils

In [2]:
dataset = utils.get_dataset()

In [3]:
import datetime as dt

train_set, test_set = utils.split_data_to_train_and_test(
    df=dataset,
    cutoff_date=dt.datetime(2023,6,1)
)

In [4]:
y_train = train_set[['price', 'sector']]
X_train = train_set.drop(['price'], axis=1)

y_test = test_set[['price', 'sector']]
X_test = test_set.drop(['price'], axis=1)

In [5]:
from sklearn.preprocessing import (
    OneHotEncoder,
    MinMaxScaler
)

one_hot_encoder = OneHotEncoder()
scaler = MinMaxScaler()

X_train_transformed = utils.transform_input(
    X=X_train,
    one_hot_encoder=one_hot_encoder,
    scaler=scaler,
    fit=True
)

X_test_transformed = utils.transform_input(
    X=X_test,
    one_hot_encoder=one_hot_encoder,
    scaler=scaler,
    fit=False
)

In [19]:
from tensorflow import keras

# Build the neural network model
model = keras.Sequential([
    # Input layer
    keras.layers.Input(shape=(100,)),  # Adjust the input shape according to your data
    
    # Hidden layers
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    
    # Output layer (a single neuron for regression)
    keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_absolute_error')  # You can also use 'mean_absolute_error' for MAE

# Train the model
history = model.fit(X_train_transformed, y_train['price'], epochs=100, batch_size=32, validation_data=(X_test_transformed, y_test['price']))

# Evaluate the model on the test set
loss = model.evaluate(X_test_transformed, y_test['price'])
print(f"Test Loss: {loss}")

# You can also make predictions using the trained model
y_pred = model.predict(X_test_transformed)

# Save and load the model (optional)
model.save("regression_model.h5")
# To load the model later: loaded_model = keras.models.load_model("regression_model.h5")


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

  saving_api.save_model(


In [20]:
# The mean squared error
print("Mean absolute error: %.2f" % mean_absolute_error(y_test['price'], y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test['price'], y_pred))
print("Mean absolute pct error: %.2f" % mean_absolute_percentage_error(y_test['price'], y_pred))

Mean absolute error: 58.62
Coefficient of determination: -0.11
Mean absolute pct error: 0.86


## Try to improve it

In [6]:
from tensorflow import keras
from sklearn.metrics import mean_squared_error


# Build a more complex neural network
model = keras.Sequential([
    keras.layers.Input(shape=(100,)),
    keras.layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.Dense(1)
])

# Implement learning rate scheduling
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9)
optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)

# Compile the model
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Implement early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train_transformed,
    y_train['price'],
    epochs=100, batch_size=64,
    validation_data=(X_test_transformed, y_test['price']),
    callbacks=[early_stopping]
)

# Evaluate the model
y_pred = model.predict(X_test_transformed)
test_loss = mean_squared_error(y_test['price'], y_pred)
print(f"Test Mean Squared Error: {test_loss:.2f}")




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Test Mean Squared Error: 28496.63


In [7]:
# The mean squared error
print("Mean absolute error: %.2f" % mean_absolute_error(y_test['price'], y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test['price'], y_pred))
print("Mean absolute pct error: %.2f" % mean_absolute_percentage_error(y_test['price'], y_pred))

Mean absolute error: 57.53
Coefficient of determination: -0.01
Mean absolute pct error: 3.49
