In [6]:
import pandas as pd
from keras import Sequential
from keras.src.callbacks import ModelCheckpoint
from keras.src.layers import Dense
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go

In [7]:
data = pd.read_csv("audi.csv").drop("model", axis=1)

test_labels = data["price"][-1000:-1]
labels = data["price"][0:-1000]

data = data.drop("price", axis=1)
data = pd.get_dummies(data, columns=["transmission"], dtype=int)
data = pd.get_dummies(data, columns=["fuelType"], dtype=int)
data

Unnamed: 0,year,mileage,tax,mpg,engineSize,transmission_Automatic,transmission_Manual,transmission_Semi-Auto,fuelType_Diesel,fuelType_Hybrid,fuelType_Petrol
0,2017,15735,150,55.4,1.4,0,1,0,0,0,1
1,2016,36203,20,64.2,2.0,1,0,0,1,0,0
2,2016,29946,30,55.4,1.4,0,1,0,0,0,1
3,2017,25952,145,67.3,2.0,1,0,0,1,0,0
4,2019,1998,145,49.6,1.0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
10663,2020,4018,145,49.6,1.0,0,1,0,0,0,1
10664,2020,1978,150,49.6,1.0,0,1,0,0,0,1
10665,2020,609,150,49.6,1.0,0,1,0,0,0,1
10666,2017,8646,150,47.9,1.4,1,0,0,0,0,1


In [8]:
scaler = StandardScaler()
test_data = data[-1000:-1]
data = data[:-1000]

data = scaler.fit_transform(data)
test_data = scaler.transform(test_data)

In [25]:
model = Sequential()
model.add(Dense(units=1024, input_shape=(11,)))
model.add(Dense(units=512, activation="relu"))
model.add(Dense(units=256, activation="relu"))
model.add(Dense(units=1))
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 1024)              12288     
                                                                 
 dense_11 (Dense)            (None, 512)               524800    
                                                                 
 dense_12 (Dense)            (None, 256)               131328    
                                                                 
 dense_13 (Dense)            (None, 1)                 257       
                                                                 
Total params: 668673 (2.55 MB)
Trainable params: 668673 (2.55 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
mc = ModelCheckpoint("best_model_audi.keras", monitor="val_mae", save_best_only=True, mode="min", verbose=1)

model.compile(optimizer="adam",
              loss="mse",
              metrics=["mae"])

history = model.fit(data, labels, validation_split=0.25, callbacks=[mc], epochs=50, batch_size=32)

model.save("model_after_training_audi.keras")

Epoch 1/50
Epoch 1: val_mae improved from inf to 6893.55078, saving model to best_model_audi.keras
Epoch 2/50
Epoch 2: val_mae improved from 6893.55078 to 3684.52466, saving model to best_model_audi.keras
Epoch 3/50
Epoch 3: val_mae improved from 3684.52466 to 3333.25854, saving model to best_model_audi.keras
Epoch 4/50
Epoch 4: val_mae improved from 3333.25854 to 3278.28516, saving model to best_model_audi.keras
Epoch 5/50
Epoch 5: val_mae did not improve from 3278.28516
Epoch 6/50
Epoch 6: val_mae improved from 3278.28516 to 3069.59253, saving model to best_model_audi.keras
Epoch 7/50
Epoch 7: val_mae improved from 3069.59253 to 2960.78662, saving model to best_model_audi.keras
Epoch 8/50
Epoch 8: val_mae improved from 2960.78662 to 2876.19849, saving model to best_model_audi.keras
Epoch 9/50
Epoch 9: val_mae improved from 2876.19849 to 2866.48999, saving model to best_model_audi.keras
Epoch 10/50
Epoch 10: val_mae did not improve from 2866.48999
Epoch 11/50
Epoch 11: val_mae improve

In [27]:
def draw_history(history):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=history.epoch, y=history.history["mae"], name="mae"))
    fig.add_trace(go.Scatter(x=history.epoch, y=history.history["val_mae"], name="val_mae"))
    fig.show()

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=history.epoch, y=history.history["loss"], name="loss"))
    fig.add_trace(go.Scatter(x=history.epoch, y=history.history["val_loss"], name="val_loss"))
    fig.show()


draw_history(history)

In [28]:
model.load_weights("model_after_training_audi.keras")

preds = model.predict(test_data, verbose=0)

df = pd.DataFrame(
    {"price": test_labels, "predictions": preds.ravel(), "error": abs(test_labels - preds.ravel())}
).reset_index().drop("index", axis=1)

print(f"Mean absolute error: {mean_absolute_error(df['price'], df['predictions'])}")

df

Mean absolute error: 2658.2598189009323


Unnamed: 0,price,predictions,error
0,25999,23930.607422,2068.392578
1,11999,13785.469727,1786.469727
2,12499,11853.554688,645.445312
3,9099,12168.666992,3069.666992
4,9999,10143.379883,144.379883
...,...,...,...
994,12695,10698.752930,1996.247070
995,16999,20576.761719,3577.761719
996,16999,20742.777344,3743.777344
997,17199,20832.496094,3633.496094
