In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import joblib

In [6]:
df = pd.read_csv(r'Carros Usados.csv')
df

Unnamed: 0,listing_id,make,model,year,trim,body_type,fuel_type,transmission,mileage,price,...,LED Headlights,Lane Keep Assist,Leather Seats,Navigation,Panoramic Roof,Parking Sensors,Push Button Start,Sunroof,Ventilated Seats,Wireless Charging
0,1,Tesla,Model 3,2019,,Coupe,Electric,Manual,46134,19919,...,True,False,False,False,False,True,False,False,True,False
1,2,Nissan,Rogue,2024,LT,Sedan,Hybrid,Automatic,16109,19480,...,False,True,False,True,True,True,True,False,False,True
2,3,Hyundai,i20,2018,XLE,Crossover,Petrol,Automatic,173239,4556,...,False,False,False,False,True,False,False,True,False,False
3,4,Kia,Sportage,2023,EX,Hatchback,Diesel,CVT,36810,11536,...,False,False,False,True,False,False,False,True,False,True
4,5,Kia,Seltos,2020,Trend,Pickup,Diesel,Automatic,87749,14098,...,False,True,False,False,True,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2063,2064,Skoda,Kushaq,2023,Sport,Pickup,Petrol,Automatic,24566,34696,...,False,False,False,False,True,False,False,False,False,True
2064,2065,Mahindra,Scorpio,2016,XSE,SUV,Diesel,Automatic,122459,2642,...,True,False,True,True,True,True,True,False,False,True
2065,2066,Audi,A6,2018,EX,Hatchback,Diesel,DCT,120452,7093,...,False,False,False,True,False,False,False,False,False,True
2066,2067,Skoda,Kushaq,2021,LX,MPV,Petrol,Manual,57043,11296,...,False,False,False,False,True,True,True,False,False,False


In [7]:
boolean_columns = ['Adaptive Cruise Control', 'Alloy Wheels', 'Android Auto', 'Apple CarPlay', 
                  'Backup Camera', 'Blind Spot Monitor', 'Bluetooth', 'Fog Lights', 
                  'Heated Seats', 'Keyless Entry', 'LED Headlights', 'Lane Keep Assist',
                  'Leather Seats', 'Navigation', 'Panoramic Roof', 'Parking Sensors',
                  'Push Button Start', 'Sunroof', 'Ventilated Seats', 'Wireless Charging']

for col in boolean_columns:
    df[col] = df[col].astype(int)

In [8]:
categorical_columns = ['make', 'model', 'trim', 'body_type', 'fuel_type', 
                      'transmission', 'condition', 'seller_type', 'city', 'state', 'country']

le_make = LabelEncoder()
df['make'] = le_make.fit_transform(df['make'].astype(str))
joblib.dump(le_make, 'make_encoder.pkl')

le_model = LabelEncoder()
df['model'] = le_model.fit_transform(df['model'].astype(str))
joblib.dump(le_model, 'model_encoder.pkl')

le_trim = LabelEncoder()
df['trim'] = le_trim.fit_transform(df['trim'].astype(str))
joblib.dump(le_trim, 'trim_encoder.pkl')

le_body_type = LabelEncoder()
df['body_type'] = le_body_type.fit_transform(df['body_type'].astype(str))
joblib.dump(le_body_type, 'body_type_encoder.pkl')

le_fuel_type = LabelEncoder()
df['fuel_type'] = le_fuel_type.fit_transform(df['fuel_type'].astype(str))
joblib.dump(le_fuel_type, 'fuel_type_encoder.pkl')

le_transmission = LabelEncoder()
df['transmission'] = le_transmission.fit_transform(df['transmission'].astype(str))
joblib.dump(le_transmission, 'transmission_encoder.pkl')

le_condition = LabelEncoder()
df['condition'] = le_condition.fit_transform(df['condition'].astype(str))
joblib.dump(le_condition, 'condition_encoder.pkl')

le_seller_type = LabelEncoder()
df['seller_type'] = le_seller_type.fit_transform(df['seller_type'].astype(str))
joblib.dump(le_seller_type, 'seller_type_encoder.pkl')

le_city = LabelEncoder()
df['city'] = le_city.fit_transform(df['city'].astype(str))
joblib.dump(le_city, 'city_encoder.pkl')

le_state = LabelEncoder()
df['state'] = le_state.fit_transform(df['state'].astype(str))
joblib.dump(le_state, 'state_encoder.pkl')

le_country = LabelEncoder()
df['country'] = le_country.fit_transform(df['country'].astype(str))
joblib.dump(le_country, 'country_encoder.pkl')

['country_encoder.pkl']

In [31]:
X = df.drop(['price', 'listing_id'], axis=1)
y = df['price']

In [32]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [34]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [35]:
model.compile(optimizer='adam', loss='mse')

In [36]:
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_split=0.2)




Epoch 1/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 187139488.0000 - val_loss: 175655776.0000
Epoch 2/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 186794784.0000 - val_loss: 174909664.0000
Epoch 3/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 185042512.0000 - val_loss: 171836640.0000
Epoch 4/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 179224096.0000 - val_loss: 163463952.0000
Epoch 5/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 165941792.0000 - val_loss: 145877344.0000
Epoch 6/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 140935968.0000 - val_loss: 117548472.0000
Epoch 7/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 104843968.0000 - val_loss: 83055504.0000
Epoch 8/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x22483646fd0>

In [37]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 


In [38]:
print(f"Model trained. MSE: {mse:,.2f}, R²: {r2:.2f}")

Model trained. MSE: 27,811,986.00, R²: 0.71


In [39]:
model.save('carrousado_model.h5')

