In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import uniform

# Load the dataset
df = pd.read_csv('https://raw.githubusercontent.com/amankharwal/Website-data/master/car_features.csv')

# Convert categorical variables to one-hot encoding
categorical_vars = ['Make', 'Model', 'Engine Fuel Type', 'Transmission Type', 'Driven_Wheels', 'Vehicle Size', 'Vehicle Style']
ohe = OneHotEncoder(sparse=False, handle_unknown='ignore')
encoded = pd.DataFrame(ohe.fit_transform(df[categorical_vars]))
encoded.columns = ohe.get_feature_names(categorical_vars)

# Combine one-hot encoded variables with continuous variables
df.drop(categorical_vars, axis=1, inplace=True)
df = pd.concat([df, encoded], axis=1)

# Split the dataset into features and target
X = df.drop('MSRP', axis=1)
y = df['MSRP']

# Define the linear regression model
linreg = LinearRegression()

# Define the hyperparameter space for RandomizedSearchCV
param_dist = {'fit_intercept': [True, False],
              'normalize': [True, False],
              'copy_X': [True, False],
              'positive': [True, False],
              'n_jobs': [1, 2, 4, 8],
              'normalize': [True, False],
              'normalize_X': [True, False],
              'intercept_scaling': uniform(loc=0, scale=10)}

# Define the RandomizedSearchCV object
rs = RandomizedSearchCV(linreg, param_distributions=param_dist, n_iter=100, cv=5, scoring='neg_root_mean_squared_error', random_state=42)

# Fit the RandomizedSearchCV object to the data
rs.fit(X, y)

# Use the best hyperparameters to fit the linear regression model to the data
best_linreg = LinearRegression(**rs.best_params_)
best_linreg.fit(X, y)

# Calculate evaluation metrics using cross-validation
cv_results = cross_val_score(best_linreg, X, y, cv=5, scoring=['r2', 'neg_root_mean_squared_error'])
cv_r2 = np.mean(cv_results['test_r2'])
cv_rmse = np.abs(np.mean(cv_results['test_neg_root_mean_squared_error']))

# Calculate evaluation metrics for the data
y_pred = best_linreg.predict(X)
r2 = r2_score(y, y_pred)
rmse = mean_squared_error(y, y_pred, squared=False)
mape = np.mean(np.abs((y - y_pred) / y)) * 100
n = X.shape[0]
p = X.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)

# Print the evaluation metrics
print(f'R-squared: {r2:.4f}')
print(f'Adjusted R-squared: {adj_r2:.4f}')
print(f'Root Mean Squared Error: {rmse:.2f}')
print(f'Mean Absolute Percentage Error: {mape:.2f}%')
print(f'CV R-squared: {cv_r2:.4f}')
print(f'CV Root Mean Squared Error: {cv_rmse:.2f}')
