In [1]:
# librerías
# Importaciones necesarias
from flask import Flask
from flask_restx import Api, Resource, fields
from flask_cors import CORS
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost import XGBRegressor
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# cargar datos (se tiene en .csv en local)
df_train=pd.read_csv("dataTrain_carListings/dataTrain_carListings.csv")
# data test tiene una columna llamada ID, que solamente es el orden de numeros
df_test=pd.read_csv("dataTest_carListings/dataTest_carListings.csv", index_col=0)
# Cargar datos reales
df_real = pd.read_csv("true_car_listings.csv", on_bad_lines='skip')

In [3]:
# eliminar columnas adicionales
df_real.drop(["City", "Vin"], axis=1, inplace=True)
df_real.columns

Index(['Id', 'Price', 'Year', 'Mileage', 'State', 'Make', 'Model'], dtype='object')

In [4]:
# Codificar variables categóricas
categorical_columns = ['State', 'Make', 'Model']
df_train = pd.get_dummies(df_train, columns=categorical_columns).astype(int)

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
data = df_train.drop(['Price'], axis=1)
target = df_train['Price']

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=42)

# Convertir los arrays resultantes en DataFrames de pandas
X_train = pd.DataFrame(X_train, columns=data.columns)
X_test = pd.DataFrame(X_test, columns=data.columns)
y_train = pd.Series(y_train, name='Price')
y_test = pd.Series(y_test, name='Price')

# Verifica los tipos de datos de las estructuras resultantes
print("Tipo de X_train:", type(X_train))
print("Tipo de X_test:", type(X_test))
print("Tipo de y_train:", type(y_train))
print("Tipo de y_test:", type(y_test))

Tipo de X_train: <class 'pandas.core.frame.DataFrame'>
Tipo de X_test: <class 'pandas.core.frame.DataFrame'>
Tipo de y_train: <class 'pandas.core.series.Series'>
Tipo de y_test: <class 'pandas.core.series.Series'>


In [7]:
import joblib

# Configuración y entrenamiento del modelo
xgb_2 = XGBRegressor(
    reg_lambda=0.16062324818666773,
    alpha=0.6139702492127704,
    subsample=0.9702623758315497,
    colsample_bytree=0.539127268885431,
    n_estimators=941,
    max_depth=9,
    min_child_weight=8,
    learning_rate=0.15277968834080027,
    gamma=0.410872106042996,
    random_state=659
)

xgb_2.fit(X_train, y_train)

# Guardar el modelo entrenado
joblib.dump(xgb_2, 'xgb_model.pkl')

['xgb_model.pkl']

In [8]:
from flask import Flask
from flask_restx import Api, Resource, fields
from flask_cors import CORS
import pandas as pd
import numpy as np
import joblib
from werkzeug.exceptions import BadRequest

app = Flask(__name__)
CORS(app)
api = Api(app, version='1.0', title='Vehicle Price Prediction API',
          description='API for predicting vehicle prices based on their features')

ns = api.namespace('price', description='Price Estimator')

parser = api.parser()
parser.add_argument('Year', type=int, required=True, help='Year of the vehicle', location='args')
parser.add_argument('Mileage', type=int, required=True, help='Mileage of the vehicle', location='args')
parser.add_argument('State', type=str, required=True, help='State where the vehicle is sold', location='args')
parser.add_argument('Make', type=str, required=True, help='Make of the vehicle', location='args')
parser.add_argument('Model', type=str, required=True, help='Model of the vehicle', location='args')

resource_fields = api.model('Resource', {
    'predicted_price': fields.Float,
})

try:
    model = joblib.load('xgb_model.pkl')
except Exception as e:
    print(f"Error loading model: {e}")
    raise

@ns.route('/')
class PricePredictor(Resource):
    @api.doc(parser=parser)
    @api.marshal_with(resource_fields)
    def get(self):
        args = parser.parse_args()
        # Crear un DataFrame con una fila de ceros con las columnas dummies requeridas
        input_data = pd.DataFrame(0, index=np.arange(1), columns=model.get_booster().feature_names)
        
        # Actualizar los valores de Year y Mileage
        input_data.at[0, 'Year'] = args['Year']
        input_data.at[0, 'Mileage'] = args['Mileage']
        
        # Actualizar las columnas dummies para State, Make, Model
        input_data.at[0, f'State_{args["State"].strip()}'] = 1
        input_data.at[0, f'Make_{args["Make"]}'] = 1
        input_data.at[0, f'Model_{args["Model"]}'] = 1
        
        # Asegúrate de que las columnas están en el mismo orden que las características del modelo
        input_data = input_data.reindex(columns=model.get_booster().feature_names, fill_value=0)
        
        # Predecir el precio
        predicted_price = model.predict(input_data)[0]

        return {'predicted_price': predicted_price}, 200

In [None]:
# Ejecución de la aplicación que disponibiliza el modelo de manera local en el puerto 5000
app.run(debug=True, use_reloader=False, host='0.0.0.0', port=5000)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.0.11:5000
Press CTRL+C to quit
127.0.0.1 - - [25/Apr/2024 01:21:22] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2024 01:21:22] "GET /swaggerui/droid-sans.css HTTP/1.1" 304 -
127.0.0.1 - - [25/Apr/2024 01:21:22] "GET /swaggerui/swagger-ui.css HTTP/1.1" 304 -
127.0.0.1 - - [25/Apr/2024 01:21:22] "GET /swaggerui/swagger-ui-bundle.js HTTP/1.1" 304 -
127.0.0.1 - - [25/Apr/2024 01:21:22] "GET /swaggerui/swagger-ui-standalone-preset.js HTTP/1.1" 304 -
127.0.0.1 - - [25/Apr/2024 01:21:22] "GET /swagger.json HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2024 01:21:22] "GET /swaggerui/favicon-32x32.png HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2024 01:24:21] "GET /price/?Year=2015&Mileage=13650&State=%20NC&Make=Buick&Model=EncoreLeather HTTP/1.1" 200 -
