In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\\ML\\Auto.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the parameter grid
param_grid = {
    'iterations': [100, 200, 500],
    'depth': [4, 6, 8],
    'learning_rate': [0.01, 0.05, 0.1],
    'l2_leaf_reg': [1, 3, 5, 7],
    'border_count': [32, 50, 100]
}

# Initialize the CatBoost regressor
catboost_regressor = CatBoostRegressor(silent=True, random_state=42)

# Set up the randomized search with cross-validation
random_search = RandomizedSearchCV(estimator=catboost_regressor, param_distributions=param_grid, n_iter=20, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model
random_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

# Evaluate on the test set
y_pred = random_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
print("R2 Score on Test Set:", r2)

Best Parameters: {'learning_rate': 0.05, 'l2_leaf_reg': 1, 'iterations': 500, 'depth': 4, 'border_count': 32}
Best Cross-Validation Score: 0.12441748577485745
R2 Score on Test Set: 0.2895853908600263
