In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_squared_log_error, r2_score
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import pickle

# Load and preprocess the data
tr_d = pd.read_csv('/kaggle/input/clothes-price-prediction/clothes_price_prediction_data.csv')

# Encode categorical data
def encode_data(data, columns):
    for col in columns:
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
    return data

# Categorical columns to encode
cat_cols = ['Brand', 'Category', 'Color', 'Size', 'Material']
tr_d_encoded = encode_data(tr_d, cat_cols)

# Split the data into features and target
X = tr_d_encoded.drop('Price', axis=1)
y = np.log1p(tr_d_encoded['Price'])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define model parameters
lgb_params = {
    'n_estimators': 899,
    'learning_rate': 0.013003893032117776,
    'max_depth': 18,
    'reg_alpha': 0.9218377389528793,
    'reg_lambda': 0.020694654173173645,
    'num_leaves': 24,
    'subsample': 0.7402011916024158,
    'colsample_bytree': 0.25484261764678784,
    'verbose': -1
}

cat_params = {
    'iterations': 853,
    'learning_rate': 0.10899577626375372,
    'depth': 7,
    'colsample_bylevel': 0.7340962061535496,
    'random_strength': 6.262882561405091,
    'min_data_in_leaf': 92,
    'verbose': 0
}

# Initialize and train the LightGBM model
lgb_model = LGBMRegressor(**lgb_params)
lgb_model.fit(X_train, y_train)

# Initialize and train the CatBoost model
cat_model = CatBoostRegressor(**cat_params)
cat_model.fit(X_train, y_train)

# Make predictions
lgb_predictions = lgb_model.predict(X_test)
cat_predictions = cat_model.predict(X_test)

# Evaluate the performance
lgb_mse = mean_squared_error(y_test, lgb_predictions)
lgb_rmse = np.sqrt(lgb_mse)
lgb_r2 = r2_score(y_test, lgb_predictions)

cat_mse = mean_squared_error(y_test, cat_predictions)
cat_rmse = np.sqrt(cat_mse)
cat_r2 = r2_score(y_test, cat_predictions)

print("LightGBM - Root Mean Squared Error:", lgb_rmse)
print("LightGBM - R^2 Score:", lgb_r2)
print("CatBoost - Root Mean Squared Error:", cat_rmse)
print("CatBoost - R^2 Score:", cat_r2)

# Serialize the models
with open('lightgbm_model.pkl', 'wb') as f:
    pickle.dump(lgb_model, f)

with open('catboost_model.pkl', 'wb') as f:
    pickle.dump(cat_model, f)

print("Models have been serialized successfully.")


LightGBM - Root Mean Squared Error: 0.7085087466010107
LightGBM - R^2 Score: 0.021017522124185728
CatBoost - Root Mean Squared Error: 0.8800271978405867
CatBoost - R^2 Score: -0.5103467860351325
Models have been serialized successfully.


In [5]:
# Load the LightGBM model
with open('lightgbm_model.pkl', 'rb') as f:
    loaded_lgb_model = pickle.load(f)

# Load the CatBoost model
with open('catboost_model.pkl', 'rb') as f:
    loaded_cat_model = pickle.load(f)

# Example of making predictions
lgb_predictions = loaded_lgb_model.predict(X_test)
cat_predictions = loaded_cat_model.predict(X_test)

# Evaluate the performance again
lgb_msle = mean_squared_log_error(y_test, lgb_predictions)


cat_msle = mean_squared_log_error(y_test, cat_predictions)


print("Loaded LightGBM - Mean Squared Log Error:", lgb_msle)


print("Loaded CatBoost - Mean Squared Log Error:", cat_msle)


SyntaxError: invalid syntax (3865558373.py, line 19)