In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
from tqdm import tqdm
from tpot.builtins import StackingEstimator
from tpot.export_utils import set_param_recursive
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import ElasticNetCV
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
def criterion(pred, y):
    t_pred = np.exp(pred) - 1
    t_y = np.exp(y) - 1
    dy = np.abs(t_pred - t_y) / t_y
    mape = np.sum(dy) / len(t_y)
    acc = len(dy[dy <= 0.05]) / len(t_y)
    return 0.2 * (1 - mape) + 0.8 * acc

In [3]:
def predictUsingOne(model):
    df = pd.read_csv('input/inputTrain.csv')
    df.drop(columns=['Unnamed: 0'], inplace=True)

    batch_size = int(np.ceil(len(df) / 10))
    pred_price = []
    for i in tqdm(range(len(df) // batch_size + 1)):
        drop_ids = df[i * batch_size : (i + 1) * batch_size]['carid'].values
        
        dropped_train_df = df[~df['carid'].isin(drop_ids)]
        pred_df = df[df['carid'].isin(drop_ids)]
        
        X_train = dropped_train_df.drop(columns=['price', 'carid']).to_numpy()
        y_train = dropped_train_df['price'].values
        X_test = pred_df.drop(columns=['price', 'carid']).to_numpy()

        f = model
        f.fit(X_train, y_train)
        pred_price += f.predict(X_test).tolist()
    return pred_price

In [4]:
# model = make_pipeline(
#             StackingEstimator(estimator=xgb.XGBRegressor(learning_rate=0.001,
#                                                             max_depth=1,
#                                                             min_child_weight=2,
#                                                             n_estimators=100,
#                                                             objective='reg:squarederror',
#                                                             subsample=0.6500000000000001)
#                                                         ),
#             RandomForestRegressor(bootstrap=False,
#                                     max_features=0.5,
#                                     min_samples_leaf=2,
#                                     min_samples_split=2
#                                     )
#         )
# predic_name = 'XGBandRFR'



# model = make_pipeline(
#     StackingEstimator(estimator=RandomForestRegressor(
#         bootstrap=True,
#         max_features=0.5,
#         min_samples_leaf=1,
#         min_samples_split=11,
#         n_estimators=100)),
#     RandomForestRegressor(bootstrap=False,
#                         max_features=0.35000000000000003,
#                         min_samples_leaf=8,
#                         min_samples_split=17,
#                         n_estimators=100
#                         )
# )
# predic_name = 'RFRandRFR'



# model = RandomForestRegressor(bootstrap=False,
#                                 max_features=0.5,
#                                 min_samples_leaf=2,
#                                 min_samples_split=2,
#                                 n_estimators=100
#                                 )
# setattr(model, 'random_state', 42)
# predic_name = 'RFR'



# parameters = {'eta': 0.1, 'eval_metric': 'mae', 'gamma': 0.1, 'max_depth': 13, 'n_estimators': 190}
# model = xgb.XGBRegressor(**parameters)
# predic_name = 'XGB'



# model = GradientBoostingRegressor(alpha=0.99,
#                                     learning_rate=0.1,
#                                     loss='lad',
#                                     max_depth=9,
#                                     max_features=0.6,
#                                     min_samples_leaf=14,
#                                     min_samples_split=10,
#                                     n_estimators=100,
#                                     subsample=1.0
#                                     )
# predic_name = 'GBR'



# model = make_pipeline(
#     StackingEstimator(estimator=ExtraTreesRegressor(
#         bootstrap=False,
#         max_features=0.4,
#         min_samples_leaf=1,
#         min_samples_split=10,
#         n_estimators=100)
#         ),
#     RandomForestRegressor(bootstrap=False,
#                         max_features=0.55,
#                         min_samples_leaf=12,
#                         min_samples_split=16,
#                         n_estimators=100
#                         )
# )
# predic_name = 'ETRandRFR'



# model = GradientBoostingRegressor(alpha=0.99,
#                                             learning_rate=0.1,
#                                             loss="huber",
#                                             max_depth=9,
#                                             max_features=0.6000000000000001,
#                                             min_samples_leaf=8,
#                                             min_samples_split=10,
#                                             n_estimators=100,
#                                             subsample=1.0
#                                             )

# predic_name = 'GBR_enhanced'


# def mape(dtrain, preds):
#     d = preds - dtrain
#     h = 1
#     scale = 1 + (d / h) ** 2
#     scale_sqrt = np.sqrt(scale)
#     grad = d / scale_sqrt
#     hess = 1 / scale / scale_sqrt
#     return grad, hess
    
# model = xgb.XGBRegressor(eta=0.1,
#                             gamma=0,
#                             max_depth=12,
#                             n_estimators=230,
#                             objective=mape
#                             )

# predic_name = 'XGB_enhanced'


# model = RandomForestRegressor(bootstrap=False,
#                               max_features=0.5,
#                               min_samples_leaf=3,
#                               min_samples_split=5,
#                               n_estimators=100)

# predic_name = 'RFR_enhanced'


# model = make_pipeline(
#     StackingEstimator(estimator=ElasticNetCV(l1_ratio=0.8, tol=0.001)),
#     StackingEstimator(estimator=ExtraTreesRegressor(bootstrap=True, max_features=0.55, min_samples_leaf=2, min_samples_split=17, n_estimators=100)),
#     RandomForestRegressor(bootstrap=False, max_features=0.4, min_samples_leaf=8, min_samples_split=14, n_estimators=100)
# )

# predic_name = 'RFRandETRandENCV'




from Final_Model import Model

model = Model()

predic_name = 'Stacking_model'

# price_df = pd.read_csv('input/inputTrain.csv')
# price_df = price_df[['carid', 'price']]

price_df = pd.read_csv('pricePrediction.csv')
price_df.drop(columns=['Unnamed: 0'], inplace=True)
price_df[predic_name] = predictUsingOne(model)
price_df.to_csv('pricePrediction.csv')
print(criterion(price_df[predic_name].values, price_df['price'].values))

  0%|          | 0/10 [00:00<?, ?it/s]

Fitting Models:


100%|██████████| 7/7 [02:28<00:00, 21.16s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<?, ?it/s]
 10%|█         | 1/10 [02:28<22:15, 148.43s/it]

Fitting Models:


100%|██████████| 7/7 [02:30<00:00, 21.52s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<?, ?it/s]
 20%|██        | 2/10 [04:59<19:59, 149.94s/it]

Fitting Models:


100%|██████████| 7/7 [02:26<00:00, 20.93s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<?, ?it/s]
 30%|███       | 3/10 [07:26<17:19, 148.53s/it]

Fitting Models:


100%|██████████| 7/7 [02:21<00:00, 20.28s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<00:00, 7000.51it/s]
 40%|████      | 4/10 [09:48<14:36, 146.08s/it]

Fitting Models:


100%|██████████| 7/7 [02:23<00:00, 20.47s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<00:00, 6995.50it/s]
 50%|█████     | 5/10 [12:12<12:05, 145.20s/it]

Fitting Models:


100%|██████████| 7/7 [02:22<00:00, 20.35s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<?, ?it/s]
 60%|██████    | 6/10 [14:35<09:37, 144.40s/it]

Fitting Models:


100%|██████████| 7/7 [02:26<00:00, 20.98s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<00:00, 7002.18it/s]
 70%|███████   | 7/10 [17:02<07:15, 145.32s/it]

Fitting Models:


100%|██████████| 7/7 [02:27<00:00, 21.04s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<?, ?it/s]
 80%|████████  | 8/10 [19:29<04:52, 146.07s/it]

Fitting Models:


100%|██████████| 7/7 [02:22<00:00, 20.34s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<00:00, 7000.51it/s]
 90%|█████████ | 9/10 [21:52<02:25, 145.03s/it]

Fitting Models:


100%|██████████| 7/7 [02:25<00:00, 20.79s/it]


Model Predicting:


100%|██████████| 7/7 [00:00<?, ?it/s]
100%|██████████| 10/10 [24:18<00:00, 145.86s/it]


0.5721835946336085
