<a href="https://colab.research.google.com/github/Pathway2008/CarPrice/blob/main/BLending.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.2-cp310-cp310-manylinux2014_x86_64.whl (98.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.6/98.6 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2


In [35]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import Lasso, Ridge
from sklearn.svm import SVR
import xgboost as xgb
from catboost import CatBoostRegressor
import lightgbm as lgb

In [12]:
train = pd.read_csv('/content/drive/MyDrive/CarPrice/train.csv')
test = pd.read_csv('/content/drive/MyDrive/CarPrice/test.csv')
sub = pd.read_csv('/content/drive/MyDrive/CarPrice/sample_submission.csv')

In [6]:
from sklearn.preprocessing import LabelEncoder

In [13]:
ordinal_features = ['브랜드', '차량모델명', '판매도시', '판매구역']

for feature in ordinal_features:
    le = LabelEncoder()
    le = le.fit(train[feature])
    train[feature] = le.transform(train[feature])

    for label in np.unique(test[feature]):
        if label not in le.classes_:
            le.classes_ = np.append(le.classes_, label)
    test[feature] = le.transform(test[feature])

In [18]:
X = train.drop(['ID', '가격'], axis=1)
y = train['가격']

In [19]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
from sklearn.preprocessing import MinMaxScaler
scaler  = MinMaxScaler()
columns_to_scale = ["생산년도", "모델출시년도", "주행거리", "배기량"]

X_train[columns_to_scale] = scaler.fit_transform(X_train[columns_to_scale])
X_valid[columns_to_scale] = scaler.transform(X_valid[columns_to_scale])

In [40]:
gbrt_params = {'n_estimators': 489, 'learning_rate': 0.037666299375964746, 'max_depth': 10, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'subsample': 0.994817517319318}

In [36]:
xgb_params = {'booster': 'gbtree', 'eta': 0.09773787198485948, 'max_depth': 10, 'subsample': 0.6537116324250624, 'colsample_bytree': 0.9736768668296801, 'lambda': 2.571247074857165, 'alpha': 3.739545080324916e-05}

In [37]:
cat_params = {'boosting_type': 'Plain', 'iterations': 1674, 'learning_rate': 0.10573424441351864, 'depth': 8, 'l2_leaf_reg': 0.00026060065975250495, 'random_strength': 1.8409010826838174e-05, 'bagging_temperature': 0.01757564674119713, 'border_count': 113}

In [38]:
lgb_params =  {'num_leaves': 96, 'learning_rate': 0.09833136097882149, 'feature_fraction': 0.933552804860751, 'bagging_fraction': 0.7528646718210341, 'bagging_freq': 7, 'lambda_l1': 4.485925018221742, 'lambda_l2': 0.00010634446495247863}

In [39]:
rf_params =  {'n_estimators': 500, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 2}

In [41]:
models = [
    ('xgboost', xgb.XGBRegressor(**xgb_params)),
    ('catboost', CatBoostRegressor(**cat_params)),
    ('lightgbm', lgb.LGBMRegressor(**lgb_params)),
    ('svr', SVR()),
    ('lasso', Lasso()),
    ('ridge', Ridge()),
    ('random_forest', RandomForestRegressor(**rf_params))
]

In [42]:
meta_X_train = np.zeros((X_train.shape[0], len(models)))
meta_X_valid = np.zeros((X_valid.shape[0], len(models)))

In [None]:
for i, (name, model) in enumerate(models):
    model.fit(X_train, y_train)
    meta_X_train[:, i] = model.predict(X_train)
    meta_X_valid[:, i] = model.predict(X_valid)

In [27]:
meta_model = GradientBoostingRegressor()
meta_model.fit(meta_X_train, y_train)
meta_preds = meta_model.predict(meta_X_valid)

In [28]:
mae = mean_absolute_error(y_valid, meta_preds)
print("Mean Absolute Error (MAE):", mae)

Mean Absolute Error (MAE): 6.090817432594649


grid search

In [44]:
meta_model = GradientBoostingRegressor(**gbrt_params)
meta_model.fit(meta_X_train, y_train)
meta_preds = meta_model.predict(meta_X_valid)

In [45]:
mae = mean_absolute_error(y_valid, meta_preds)
print("Mean Absolute Error (MAE):", mae)

Mean Absolute Error (MAE): 5.971731093105415


In [30]:
from sklearn.model_selection import train_test_split, cross_val_predict, KFold

In [None]:
kf = KFold(n_splits=5, random_state=42, shuffle=True)

for i, (name, model) in enumerate(models):
    fold_predictions = []
    for train_index, val_index in kf.split(X_train):
        X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
        y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

        model.fit(X_train_fold, y_train_fold)
        fold_pred = model.predict(X_val_fold)
        fold_predictions.append(fold_pred)

    meta_X_train[:, i] = np.concatenate(fold_predictions)

In [None]:
for i, (name, model) in enumerate(models):
    model.fit(X_train, y_train)
    meta_X_valid[:, i] = model.predict(X_valid)

In [33]:
meta_model = GradientBoostingRegressor()
meta_model.fit(meta_X_train, y_train)
meta_preds = meta_model.predict(meta_X_valid)

In [34]:
mae = mean_absolute_error(y_valid, meta_preds)
print("Mean Absolute Error (MAE):", mae)

Mean Absolute Error (MAE): 29.295139265478856


Blending

In [46]:
meta_model = GradientBoostingRegressor(**gbrt_params)
xgb_model = xgb.XGBRegressor(**xgb_params)
catboost_model = CatBoostRegressor(**cat_params)
lgbm_model = lgb.LGBMRegressor(**lgb_params)
rf_model = RandomForestRegressor(**rf_params)
svr_model = SVR()
lasso_model = Lasso()
ridge_model = Ridge()

In [None]:
xgboost_model = xgb_model.fit(X_train, y_train)
lightgb_model = lgbm_model.fit(X_train, y_train)
catboost_model = catboost_model.fit(X_train,y_train)
rf_model = rf_model.fit(X_train,y_train)
svr_model = svr_model.fit(X_train, y_train)
lasso_model = lasso_model.fit(X_train, y_train)
ridge_model = ridge_model.fit(X_train, y_train)
meta_model.fit(meta_X_train, y_train)

In [50]:
def blended_predictions(X):
    return ((0.05 * lasso_model.predict(X)) + \
            (0.05 * ridge_model.predict(X)) + \
            (0.05 * svr_model.predict(X)) + \
            (0.1 * catboost_model.predict(X)) + \
            (0.15 * xgboost_model.predict(X)) + \
            (0.1 * lightgb_model.predict(X)) + \
            (0.5 * meta_model.predict((X))))

In [51]:
pred = blended_predictions(X_valid)



ValueError: ignored

In [None]:
mae = mean_absolute_error(y_valid, pred)
print("Mean Absolute Error (MAE):", mae)