In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso, Ridge, ElasticNet, LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from warnings import filterwarnings
filterwarnings('ignore')
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

In [None]:
input_list1 = [('City', 'Bakı'), ('Brand', 'Infiniti'),
               ('Model', 'Q50'), ('ProdYear', '2020'),
               ('BanType', 'Sedan'),
               ('Color', 'Boz'), ('EngVol', '2.0'),
               ('EngPow', '211'), ('FuelType', 'Benzin'),
               ('RideDist', '0'), ('Gearbox', 'Avtomat'),
               ('Transmission', 'Arxa'), ('Barter', '1'),
               ('YungulLehimliDiskler', '1'), ('Condisioner', '1'),
               ('Lyuk', '1'), ('RainSensor', '1'),
               ('MerkeziQapanma', '1'), ('ParKRadar', '1'),
               ('LeatherSalon', '1'), ('SeatHeat', '1'),
               ('KsenonLamps', '1'),
               ('YanPerdeler', '1'), ('BackVisionCam', '1'), ('ABS', '1')]
# $38.500


In [7]:
input_list2 = [('City', 'Bakı'), ('Brand', 'Kia'),
               ('Model', 'Cerato'), ('ProdYear', '2011'),
               ('BanType', 'Sedan'),
               ('Color', 'Qara'), ('EngVol', '1.6'),
               ('EngPow', '126'), ('FuelType', 'Benzin'),
               ('RideDist', '133000'), ('Gearbox', 'Avtomat'),
               ('Transmission', 'Ön'),
               ('YungulLehimliDiskler', '1'), ('Condisioner', '1'),
               ('ABS', '1'), ('ParKRadar', '1'),
               ('BackVisionCam', '1')]  # 16000

In [6]:
df = pd.read_csv('../input/maindata/clean.csv')
data = df.copy()

In [8]:
def filter_car_details(input_list):
    column_list = list(df.columns)
    column_list.remove('Price')
    entry_names = [item[0] for item in input_list]
    false_options = list(set(column_list)-set(entry_names))
    filtered_list = input_list
    for op in false_options:
        filtered_list.append((op, '0'))
    l = []
    for i in filtered_list:
        try:
            l.append([i[0], float(i[1])])
        except:
            l.append([i[0], i[1]])
    filtered_list = l.copy()
    return filtered_list

In [9]:
def encoded_test_frame(df, filtered_list):
    columns = [col for col in df.columns]
    columns.remove('Price')
    ordered_option_list = []
    for col, opt in zip(columns, filtered_list):
        if col == opt[0]:
            ordered_option_list.append(opt[1])
        else:
            for i in filtered_list:
                if i[0] == col:
                    ordered_option_list.append(i[1])
    test = pd.DataFrame(columns=columns)
    test.loc[len(test)] = ordered_option_list
    data = df.copy()
    merged = pd.concat([data, test], keys=['data', 'test'], axis=0)
    merged = pd.get_dummies(merged, drop_first=True)
    test = merged.loc['test']
    encoded_data = merged.loc['data']
    return test, encoded_data

In [10]:
test_kia, encoded_data = encoded_test_frame(
    df, filter_car_details(input_list2))
encoded_data_copy = encoded_data.copy()

In [12]:
def minmax_scaler(dataframe, feature_range=(0, 1)):
    minmax_scaler = MinMaxScaler(feature_range=feature_range)
    col_names = [col for col in dataframe.columns]
    dataframe[col_names] = minmax_scaler.fit_transform(dataframe[col_names])
    return dataframe

In [13]:
encoded_data = minmax_scaler(encoded_data)

X = encoded_data.drop('Price', axis=1)
Y = encoded_data[['Price']]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25,
                                                    random_state=42)


In [15]:
# Ridge
ridge_params = {'alpha': [0.1, 0.01, 0.005, 0.05, 0.2, 0.3, 0.5, 0.8, 0.9, 1],
                'solver': ['auto', 'svd']}

rg = GridSearchCV(Ridge(), ridge_params, cv=5, return_train_score=True)
rg.fit(X_train, Y_train)
rg_test_pred = rg.predict(X_test)
mse = mean_squared_error(Y_test, rg_test_pred)
score = r2_score(Y_test, rg_test_pred)
print(f'Mse: {mse}')  # 0.6430655438041133
print(f'r2_score: {score}')  # 0.7310568100867445
print(f'Best params: {rg.best_params_}')  # {'alpha': 1, 'solver': 'svd'}

In [16]:
# Lasso
lasso_params = {'selection': ['cyclic', 'random'],
                'alpha': [0.1, 0.01, 0.005, 0.05, 0.2, 0.3, 0.5, 0.8, 0.9, 1]}

ls = GridSearchCV(Lasso(), lasso_params, cv=5, return_train_score=True)
ls.fit(X_train, Y_train)
ls_test_pred = ls.predict(X_test)
mse = mean_squared_error(Y_test, ls_test_pred)
score = r2_score(Y_test, ls_test_pred)
print(f'Mse: {mse}')  # 0.0013894243909161508
print(f'r2_score: {score}')  # 0.08566071082158677
# {'alpha': 0.005, 'selection': 'random'}
print(f'Best params: {ls.best_params_}')

In [17]:
# Elastic Net
elastic_params = {'alpha': [0.1, 0.01, 0.005, 0.05, 0.2, 0.3, 0.5, 0.8, 0.9, 1],
                  'selection': ['cyclic', 'random']}

el = GridSearchCV(ElasticNet(), elastic_params, cv=5, return_train_score=True)
el.fit(X_train, Y_train)
el_test_pred = el.predict(X_test)
mse = mean_squared_error(Y_test, el_test_pred)
score = r2_score(Y_test, el_test_pred)
print(f'Mse: {mse}')  # 0.0012208666241244036
print(f'r2_score: {score}')  # 0.19658361506990274
# {'alpha': 0.005, 'selection': 'cyclic'}
print(f'Best params: {el.best_params_}')

In [18]:
#LGBM
lgbm_params = {'learning_rate': [0.1, 0.2, 0.3, 0.4, 0.5], 'max_depth': list(range(-3, 2, 1)), 
                   'n_estimators': list(range(97, 105))}
lg = GridSearchCV(LGBMRegressor(), lgbm_params, cv=5, return_train_score=True)
lg.fit(X_train, Y_train)
lg_test_pred = lg.predict(X_test)
mse = mean_squared_error(Y_test, lg_test_pred)
score = r2_score(Y_test, lg_test_pred)
print(f'Mse: {mse}')  # 0.0001736633300886039
print(f'r2_score: {score}')  # 0.9533567048268635
# {'learning_rate': 0.2, 'max_depth': -1, 'n_estimators': 102}
print(f'Best params: {lg.best_params_}')

In [None]:
#Cat Boost
cat_params = {'depth': [2,4,6,8,10], 'learning_rate' : [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 1], 
              'iterations' : [30, 80, 50, 100, 150]}
cb = GridSearchCV(CatBoostRegressor(silent=True), cat_params, cv=5, return_train_score=True)
cb.fit(X_train, Y_train)
cb_test_pred = cb.predict(X_test)
mse = mean_squared_error(Y_test, cb_test_pred)
score = r2_score(Y_test, cb_test_pred)
print(f'Mse: {mse}')  # 0.002854243766581994
print(f'r2_score: {score}')  # 0.23339409400450595
# {'depth': 2, 'iterations': 30, 'learning_rate': 0.01}
print(f'Best params: {cb.best_params_}')

In [None]:
xgb_params = {'eta': [0.01, 0.1], 'gamma': [0, 0.5, 1],
             'max_depth': [1, 2, 5]}
xgb = GridSearchCV(XGBRegressor(verbosity=0), xgb_params, cv=5, return_train_score=True)
xgb.fit(X_train, Y_train)
xgb_test_pred = xgb.predict(X_test)
mse = mean_squared_error(Y_test, xgb_test_pred)
score = r2_score(Y_test, xgb_test_pred)
print(f'Mse: {mse}')  # 0.0001736633300886039
print(f'r2_score: {score}')  # 0.9533567048268635
# {'learning_rate': 0.2, 'max_depth': -1, 'n_estimators': 102}
print(f'Best params: {xgb.best_params_}')