In [58]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import tensorflow as tf
import tensorflow.keras.callbacks

In [30]:
df = pd.read_csv("train.csv")
df = df.drop(["ID", "Levy"], axis=1)
df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna(axis=0)

In [31]:
label_encoder = LabelEncoder()
minmax_scaler = MinMaxScaler()

In [32]:
df

Unnamed: 0,Price,Manufacturer,Model,Prod. year,Category,Leather interior,Fuel type,Engine volume,Mileage,Cylinders,Gear box type,Drive wheels,Doors,Wheel,Color,Airbags
0,13328,LEXUS,RX 450,2010,Jeep,Yes,Hybrid,3.5,186005 km,6.0,Automatic,4x4,04-May,Left wheel,Silver,12
1,16621,CHEVROLET,Equinox,2011,Jeep,No,Petrol,3,192000 km,6.0,Tiptronic,4x4,04-May,Left wheel,Black,8
2,8467,HONDA,FIT,2006,Hatchback,No,Petrol,1.3,200000 km,4.0,Variator,Front,04-May,Right-hand drive,Black,2
3,3607,FORD,Escape,2011,Jeep,Yes,Hybrid,2.5,168966 km,4.0,Automatic,4x4,04-May,Left wheel,White,0
4,11726,HONDA,FIT,2014,Hatchback,Yes,Petrol,1.3,91901 km,4.0,Automatic,Front,04-May,Left wheel,Silver,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19232,8467,MERCEDES-BENZ,CLK 200,1999,Coupe,Yes,CNG,2.0 Turbo,300000 km,4.0,Manual,Rear,02-Mar,Left wheel,Silver,5
19233,15681,HYUNDAI,Sonata,2011,Sedan,Yes,Petrol,2.4,161600 km,4.0,Tiptronic,Front,04-May,Left wheel,Red,8
19234,26108,HYUNDAI,Tucson,2010,Jeep,Yes,Diesel,2,116365 km,4.0,Automatic,Front,04-May,Left wheel,Grey,4
19235,5331,CHEVROLET,Captiva,2007,Jeep,Yes,Diesel,2,51258 km,4.0,Automatic,Front,04-May,Left wheel,Black,4


In [33]:
def process_airbags_cylinders(x):
    if (x>=0 and x<=4):
        return 0
    elif (x>4 and x<=8):
        return 1
    elif (x>8 and x<=12):
        return 2
    elif (x>12 and x<=16):
        return 3
df["Airbags"] = df["Airbags"].apply(process_airbags_cylinders)
df["Cylinders"] = df["Cylinders"].apply(process_airbags_cylinders)

In [34]:
df["Color"] = label_encoder.fit_transform(df["Color"])

In [35]:
df["Wheel"] = label_encoder.fit_transform(df["Wheel"])

In [36]:
df["Doors"] = label_encoder.fit_transform(df["Doors"])

In [37]:
df["Drive wheels"] = label_encoder.fit_transform(df["Drive wheels"])

In [38]:
df["Gear box type"] = label_encoder.fit_transform(df["Gear box type"])

In [39]:
df["Fuel type"] = label_encoder.fit_transform(df["Fuel type"])

In [40]:
df["Leather interior"] = label_encoder.fit_transform(df["Leather interior"])

In [41]:
def process_milage(x):
    x = float("".join([i for i in x if i.isdigit() or i == "."]))
    x = np.log10(x)
    return x

df["Mileage"] = df["Mileage"].apply(process_milage)

In [42]:
def process_turbo(x):
    if "Turbo" in x:
        return 1
    else:
        return 0
df["Turbo"] = df["Engine volume"].apply(process_turbo)

In [43]:
def process_engine_volume(x):
    x = float("".join([i for i in x if i.isdigit() or i == "."]))
    if x <= 3:
        return 0
    else:
        return 1
df["Engine volume"] = df["Engine volume"].apply(process_engine_volume)

In [44]:
def process_prodyear(x):
    if x <= 1950:
        return 0
    elif 1951 < x <= 1975:
        return 1
    elif 1976 < x <= 2000:
        return 2
    elif 2001 < x <= 2010:        
        return 3
    elif 2011 < x <= 2015:
        return 4
    else:
        return 5

df["Prod. year"] = df["Prod. year"].apply(process_prodyear)

In [45]:
def process_price(x):
    return np.log10(x)

df["Price"] = df["Price"].apply(process_price)

In [46]:
for col in ["Model", "Manufacturer", "Category"]:
    x = label_encoder.fit_transform(df[col])
    x = minmax_scaler.fit_transform(x.reshape(-1, 1))
    df[col] = x

In [47]:
df

Unnamed: 0,Price,Manufacturer,Model,Prod. year,Category,Leather interior,Fuel type,Engine volume,Mileage,Cylinders,Gear box type,Drive wheels,Doors,Wheel,Color,Airbags,Turbo
0,4.124765,0.500000,0.781624,3,0.4,1,2,1,5.269525,1,0,0,1,0,12,2,0
1,4.220657,0.125000,0.414097,5,0.4,0,5,0,5.283301,1,2,0,1,0,1,1,0
2,3.927730,0.328125,0.430459,3,0.3,0,5,0,5.301030,0,3,1,1,1,1,0,0
3,3.557146,0.250000,0.415985,5,0.4,1,2,0,5.227799,0,0,0,1,0,14,0,0
4,4.069150,0.328125,0.430459,4,0.3,1,5,0,4.963320,0,0,1,1,0,12,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19232,3.927730,0.562500,0.242291,2,0.1,1,0,0,5.477121,0,1,2,0,0,12,1,1
19233,4.195374,0.359375,0.839522,5,0.9,1,5,0,5.208441,0,2,1,1,0,11,1,0
19234,4.416774,0.359375,0.907489,3,0.4,1,1,0,5.065822,0,0,1,1,0,7,0,0
19235,3.726809,0.125000,0.286973,3,0.4,1,1,0,4.709762,0,0,1,1,0,1,0,0


In [48]:
# for col in df.columns:
#     if col in ["Price", "Mileage", "Model", "Manufacturer", "Category"]:
#         pass
#     else:
#         df[col] = minmax_scaler.fit_transform(np.array(df[col]).reshape(-1, 1))

In [49]:
df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna(axis=0)

In [50]:
x = np.array(df.drop("Price", axis=1))
y = np.array(df["Price"]).reshape(-1, 1)

In [51]:
x.shape, y.shape

((18516, 16), (18516, 1))

In [27]:
def create_batches_x(array, batch_size):
    q, mod = divmod(len(array), batch_size)
    array = array[:-mod]
    array = array.reshape((q, batch_size, 16))
    return array

def create_batches_y(array, batch_size):
    q, mod = divmod(len(array), batch_size)
    array = array[:-mod]
    array = array.reshape((q, batch_size, 1))
    return array
x, y = create_batches_x(x, 32), create_batches_y(y, 32)

In [52]:
x.shape, y.shape

((18516, 16), (18516, 1))

In [26]:
np.save("x.npy", x)
np.save("y.npy", y)

In [53]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
# x_train, x_test, y_train, y_test = torch.Tensor(x_train), torch.Tensor(x_test), torch.Tensor(y_train), torch.Tensor(y_test)

In [54]:
x_train.shape, y_train.shape

((14812, 16), (14812, 1))

In [55]:
count = np.isinf(x_train).sum()
print(count)

0


In [56]:
for col in df.columns:
    x = np.array(df[col])
    print(np.isinf(x).sum(), col)

0 Price
0 Manufacturer
0 Model
0 Prod. year
0 Category
0 Leather interior
0 Fuel type
0 Engine volume
0 Mileage
0 Cylinders
0 Gear box type
0 Drive wheels
0 Doors
0 Wheel
0 Color
0 Airbags
0 Turbo


In [61]:
import tensorflow as tf

# model = tf.keras.Sequential([

#     tf.keras.Input(x_train.shape[1]),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(512,activation='elu'),
#     tf.keras.layers.Dropout(.4),
#     tf.keras.layers.Dense(512, activation='elu'),
#     tf.keras.layers.Dropout(.333),
#     tf.keras.layers.Dense(64, activation='elu'),
#     tf.keras.layers.Dense(1,activation='linear')
# ])

model = tf.keras.Sequential([

    tf.keras.Conv1D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512,activation='elu'),
    tf.keras.layers.Dropout(.4),
    tf.keras.layers.Dense(512, activation='elu'),
    tf.keras.layers.Dropout(.333),
    tf.keras.layers.Dense(64, activation='elu'),
    tf.keras.layers.Dense(1,activation='linear')
])

# model.summary()


optim = tf.keras.optimizers.Adam(lr=0.001)
loss = 'mae'
logdir = "runs/tensorflow1/"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
# loss = 'mean_absolute_error'
# loss = 'mean_squared_error'


# def sch(epoch, lr):
#     if epoch%5== 0:
#         return lr*.1
#     else:
#         return lr


# cb = [tf.keras.callbacks.LearningRateScheduler(sch,0),

#     tf.keras.callbacks.ReduceLROnPlateau(
#     monitor='val_loss', factor=0.1, patience=5, verbose=1,
#     mode='auto', min_delta=0.0001, cooldown=0, min_lr=0),

#     tf.keras.callbacks.EarlyStopping(
#     monitor='val_loss', min_delta=0, patience=10, verbose=1,
#     mode='auto', baseline=None, restore_best_weights=True
# )

# ]

model.compile(optimizer=optim, loss=loss, metrics=[
              'mean_squared_logarithmic_error'])
history = model.fit(x_train, y_train, epochs=100,verbose=1,validation_split=.1, callbacks=[tensorboard_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78