In [3]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pandas as pd
from math import sqrt
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
import seaborn as sns
import pickle
import warnings
warnings.filterwarnings('ignore')

In [21]:
data = pd.read_csv('D:/ML/data/cars.csv')
data["engine_capacity"] = data["engine_capacity"].fillna(data["engine_capacity"].mean())
maping = {
    True : 1,
    False : 0,
    'Минская обл.' : 5,
    'Гомельская обл.' : 3,
    'Брестская обл.' : 1,
    'Могилевская обл.' : 6,
    'Витебская обл.' : 2,
    'Гродненская обл.' : 4,
    'automatic' : 1,
    'mechanical' : -1
}
data = data.replace(maping)
category_col = [
    "color",
    "engine_fuel",
    "engine_type",
    "body_type",
    "state",
    "drivetrain",
    "model_name",
    "manufacturer_name"
]
data = pd.get_dummies(data=data, columns=category_col)
data.to_csv('D:/ML/data/cars_preprocessed.csv', index=False)

In [28]:
data = pd.read_csv('D:/ML/data/cars_preprocessed.csv')
y = data[["price_usd"]]
X = data.drop(["price_usd"], axis=1)

In [46]:
pca = PCA(n_components=8)
X_pca = pca.fit_transform(X, y)

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle=False)

In [58]:
model_regression = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="linear", input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(16, activation="linear"),
        tf.keras.layers.Dense(8, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(1, activation="linear"),
    ]
)
model_regression.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005), loss="mae")
model_regression.fit(X_train, y_train, epochs=20, verbose=False)

<keras.callbacks.History at 0x267dabfb040>

In [59]:
y_pred = model_regression.predict(X_test)
print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
print(f'MAPE: {(mean_absolute_percentage_error(y_test, y_pred))}')
print(f'R^2: {r2_score(y_test, y_pred)}')
model_regression.save('D:/ML/model/cars_preprocessed_tensor.csv')

MAE: 2981.437361501621
MSE: 25053739.864218276
RMSE: 5005.371101548643
MAPE: 1.3079520828205096
R^2: 0.442942695082174




INFO:tensorflow:Assets written to: D:/ML/model/cars_preprocessed_tensor.csv\assets


INFO:tensorflow:Assets written to: D:/ML/model/cars_preprocessed_tensor.csv\assets
