In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
import warnings
warnings.filterwarnings("ignore")

In [43]:
cars_data = pd.read_csv('data.csv')
cars_data.head(20)

Unnamed: 0,model,year,motor_type,running,wheel,color,type,status,motor_volume,price
0,toyota,2022,petrol,3000 km,left,skyblue,sedan,excellent,2.0,24500
1,mercedes-benz,2014,petrol,132000 km,left,black,sedan,excellent,2.0,25500
2,kia,2018,petrol,95000 miles,left,other,sedan,excellent,2.0,11700
3,mercedes-benz,2002,petrol,137000 miles,left,golden,sedan,excellent,3.2,12000
4,mercedes-benz,2017,petrol,130000 km,left,black,sedan,good,2.0,26000
5,mercedes-benz,2001,petrol,286293 km,left,blue,sedan,good,1.8,6000
6,nissan,2019,petrol,49000 miles,left,gray,suv,excellent,2.0,23200
7,nissan,2018,petrol,70000 miles,left,silver,suv,excellent,2.5,19800
8,mercedes-benz,2019,petrol,85000 km,left,black,sedan,excellent,2.0,7000
9,hyundai,2021,petrol,58000 miles,left,silver,sedan,crashed,2.0,13900


In [44]:
cars_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   object 
 1   year          1642 non-null   int64  
 2   motor_type    1642 non-null   object 
 3   running       1642 non-null   object 
 4   wheel         1642 non-null   object 
 5   color         1642 non-null   object 
 6   type          1642 non-null   object 
 7   status        1642 non-null   object 
 8   motor_volume  1642 non-null   float64
 9   price         1642 non-null   int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 128.4+ KB


In [46]:
cars_data.describe()

Unnamed: 0,year,motor_volume,price
count,1642.0,1642.0,1642.0
mean,2014.805725,2.035018,15982.633374
std,6.587573,0.253069,7176.084647
min,1987.0,0.2,462.0
25%,2013.0,2.0,12000.0
50%,2017.0,2.0,15750.0
75%,2019.0,2.0,18500.0
max,2023.0,4.0,87000.0


In [None]:
cars_data.isna().sum()

model           0
year            0
motor_type      0
running         0
wheel           0
color           0
type            0
status          0
motor_volume    0
price           0
dtype: int64

In [None]:
cars_data.duplicated().sum()

12

In [None]:
cars_data = cars_data.drop_duplicates()
cars_data.duplicated().sum()

0

# Обработка

In [None]:
cars_data = pd.get_dummies(cars_data, columns=['model', 'motor_type', 'running', 'wheel','color','type','status'], drop_first=True)
cars_data = cars_data.replace({False: 0, True: 1})

In [None]:
cars_data.head(5)

Unnamed: 0,year,motor_volume,price,model_kia,model_mercedes-benz,model_nissan,model_toyota,motor_type_gas,motor_type_hybrid,motor_type_petrol,...,type_Universal,type_hatchback,type_minivan / minibus,type_pickup,type_sedan,type_suv,status_excellent,status_good,status_new,status_normal
0,2022,2.0,24500,0,0,0,1,0,0,1,...,0,0,0,0,1,0,1,0,0,0
1,2014,2.0,25500,0,1,0,0,0,0,1,...,0,0,0,0,1,0,1,0,0,0
2,2018,2.0,11700,1,0,0,0,0,0,1,...,0,0,0,0,1,0,1,0,0,0
3,2002,3.2,12000,0,1,0,0,0,0,1,...,0,0,0,0,1,0,1,0,0,0
4,2017,2.0,26000,0,1,0,0,0,0,1,...,0,0,0,0,1,0,0,1,0,0


In [None]:
cars_data.to_csv("cars_data52.csv",index=False)

# Обучение

In [None]:
y = cars_data["price"]
X = cars_data.drop(["price"], axis=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.8, random_state=42)

In [None]:
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
y_pred = ridge_model.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared:", r_squared)

Mean Squared Error (MSE): 17303651.285595752
Mean Absolute Error (MAE): 2840.5148494152445
R-squared: 0.65652058256137


In [None]:
with open('Ridge.pkl', 'wb') as pickle_out:
    pickle.dump(ridge_model, pickle_out)