In [1]:
import pandas as pd
from lightgbm import LGBMRegressor

import joblib

In [2]:
# Код для чтения данных
df_autos_train = pd.read_csv('df_autos_train.csv')

In [3]:
df_autos_train['Power'] = df_autos_train['Power'].fillna(0)
df_autos_train = df_autos_train.query('Power != 0')
df_autos_train['Power'] = df_autos_train['Power'].astype('int')
df_autos_train = df_autos_train.drop(['NumberOfPictures', 'PostalCode'], axis=1)

### LightGBM

In [4]:
# Поменяем тип данных на категориальный в следующих стобцах
for col in ['VehicleType', 'Gearbox', 'Model', 'FuelType', 'Brand', 'NotRepaired']:
    df_autos_train[col] = df_autos_train[col].astype('category')

In [5]:
df_autos_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 341855 entries, 0 to 341855
Data columns (total 10 columns):
 #   Column            Non-Null Count   Dtype   
---  ------            --------------   -----   
 0   Price             341855 non-null  int64   
 1   VehicleType       341855 non-null  category
 2   RegistrationYear  341855 non-null  int64   
 3   Gearbox           341855 non-null  category
 4   Power             341855 non-null  int64   
 5   Model             341855 non-null  category
 6   Kilometer         341855 non-null  int64   
 7   FuelType          341855 non-null  category
 8   Brand             341855 non-null  category
 9   NotRepaired       341855 non-null  category
dtypes: category(6), int64(4)
memory usage: 15.3 MB


In [6]:
# Выделим переменные признаки и признак, который нужно предсказать для каждой таблицы
features = df_autos_train.drop('Price', axis=1)
target = df_autos_train['Price']

In [7]:
model_lgbm = LGBMRegressor(random_state=12345, n_estimators=500, max_depth=6, num_leaves=40)
model_lgbm.fit(features, target)

LGBMRegressor(max_depth=6, n_estimators=500, num_leaves=40, random_state=12345)

In [11]:
# joblib.dump(model_lgbm, 'lgb.pkl')
# model_lgbm = joblib.load('lgb.pkl')

In [9]:
df_autos_train.loc[123456]

Price                 3500
VehicleType          coupe
RegistrationYear      1997
Gearbox             manual
Power                  170
Model                  3er
Kilometer           150000
FuelType            petrol
Brand                  bmw
NotRepaired             no
Name: 123456, dtype: object

In [12]:
features_test = [['coupe', 2000, 'manual', 170, '3er', 150000, 'petrol', 'bmw', 'no']]
features_columns = features.columns
df_test = pd.DataFrame(data = features_test, columns = features_columns)

for col in ['VehicleType', 'Gearbox', 'Model', 'FuelType', 'Brand', 'NotRepaired']:
    df_test[col] = df_test[col].astype('category')

In [13]:
predictions = model_lgbm.predict(df_test)
print(f'Примерная стоимоть автомобиля: {round(round(predictions[0], -1))} €')

Примерная стоимоть автомобиля: 4140 €
