In [16]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
data = pd.read_csv('aqardata.csv')

# Separate features and target variable
X = data.drop('Price', axis=1)
y = data['Price']

# Encode categorical variables with LabelEncoder
encoder = LabelEncoder()
X_encoded = X.apply(encoder.fit_transform)

# Initialize the KFold object with shuffle and random_state
kf = KFold(n_splits=5, shuffle=True, random_state=35)

# Initialize a list to store the mean squared errors, mean absolute errors, RMSE, and R2 scores for each model
evaluation_scores = []

# Define the models to evaluate
models = [
    ('Linear Regression', LinearRegression()),
    ('Random Forest', RandomForestRegressor(random_state=35)),
    ('Gradient Boosting', GradientBoostingRegressor(random_state=35)),
    ('Support Vector Regression', SVR())
]

# Iterate over the models and perform cross-validation
for model_name, model in models:
    # Initialize lists to store the evaluation scores for each fold
    fold_mse_scores = []
    fold_mae_scores = []
    fold_rmse_scores = []
    fold_r2_scores = []

    # Perform cross-validation
    for train_index, test_index in kf.split(X_encoded):
        # Split the data into training and testing sets for this fold
        X_train, X_test = X_encoded.iloc[train_index], X_encoded.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Fit the model on the training data
        model.fit(X_train, y_train)

        # Make predictions on the testing data
        y_pred = model.predict(X_test)

        # Calculate the evaluation scores for this fold
        fold_mse = mean_squared_error(y_test, y_pred)
        fold_mae = mean_absolute_error(y_test, y_pred)
        fold_rmse = mean_squared_error(y_test, y_pred, squared=False)
        fold_r2 = r2_score(y_test, y_pred)

        # Append the evaluation scores to the respective lists
        fold_mse_scores.append(fold_mse)
        fold_mae_scores.append(fold_mae)
        fold_rmse_scores.append(fold_rmse)
        fold_r2_scores.append(fold_r2)

    # Calculate the mean of the evaluation scores for this model
    avg_mse = sum(fold_mse_scores) / len(fold_mse_scores)
    avg_mae = sum(fold_mae_scores) / len(fold_mae_scores)
    avg_rmse = sum(fold_rmse_scores) / len(fold_rmse_scores)
    avg_r2 = sum(fold_r2_scores) / len(fold_r2_scores)

    # Append the average evaluation scores to the list
    evaluation_scores.append((model_name, avg_mse, avg_mae, avg_rmse, avg_r2))

# Sort the models based on their average mean squared error in ascending order
evaluation_scores.sort(key=lambda x: x[1])

# Get the best performing model and its corresponding evaluation scores
best_model, best_mse, best_mae, best_rmse, best_r2 = evaluation_scores[0]

# Print the best performing model and its evaluation scores
print(f"Best Model: {best_model}")
print(f"Mean Squared Error: {best_mse}")
print(f"Mean Absolute Error: {best_mae}")
print(f"Root Mean Squared Error: {best_rmse}")
print(f"R2 Score: {best_r2}")

Best Model: Gradient Boosting
Mean Squared Error: 7457009.423380251
Mean Absolute Error: 771.6596837607334
Root Mean Squared Error: 2447.0612705619224
R2 Score: 0.248443795245305
