In [1]:
# Regression

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

# Load the training and testing datasets

train_data = pd.read_csv("../Part 1/real-state/train_full_Real-estate.csv")
test_data = pd.read_csv("../Part 1/real-state/test_full_Real-estate.csv")

# Prepare data for regression task
X_train_reg = train_data.drop(columns=['Y house price of unit area'])
y_train_reg = train_data['Y house price of unit area']
X_test_reg = test_data.drop(columns=['Y house price of unit area'])
y_test_reg = test_data['Y house price of unit area']

# Define the hyperparameter grid
param_grid = {
    'fit_intercept': [True, False],
    'copy_X': [True, False],
    'positive': [True, False]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(LinearRegression(), param_grid, cv=5, scoring='neg_mean_squared_error')

# Perform hyperparameter tuning
grid_search.fit(X_train_reg, y_train_reg)

# Get the best hyperparameters
best_params = grid_search.best_params_

# Train Linear Regression model with the best hyperparameters
best_linear_reg_model = LinearRegression(**best_params)
best_linear_reg_model.fit(X_train_reg, y_train_reg)

# Make predictions with the best model
y_pred_reg_best = best_linear_reg_model.predict(X_test_reg)

# Calculate RMSE with the best model
rmse_reg_best = mean_squared_error(y_test_reg, y_pred_reg_best, squared=False)

# Print the best hyperparameters and the corresponding RMSE
print("Best hyperparameters:", best_params)
print("Regression - Root Mean Squared Error (RMSE) with best hyperparameters:", rmse_reg_best)


Best hyperparameters: {'copy_X': True, 'fit_intercept': False, 'positive': False}
Regression - Root Mean Squared Error (RMSE) with best hyperparameters: 8.602718711928972




In [4]:
# Classification

# after applying hyper parameter tuning 

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Load the training and testing datasets
train_data = pd.read_csv("../Part 1/real-state/train_full_Real-estate.csv")
test_data = pd.read_csv("../Part 1/real-state/test_full_Real-estate.csv")

# Prepare data for classification task
X_train_cls = train_data.drop(columns=['Y house price of unit area'])
y_train_cls = train_data['Y house price of unit area'] >= 30  # Convert to binary labels
X_test_cls = test_data.drop(columns=['Y house price of unit area'])
y_test_cls = test_data['Y house price of unit area'] >= 30  # Convert to binary labels

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Create the grid search model
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')

# Perform hyperparameter tuning
grid_search.fit(X_train_cls, y_train_cls)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)

# Use the best model to make predictions
best_model = grid_search.best_estimator_
y_pred_cls_rf = best_model.predict(X_test_cls)

# Calculate accuracy
accuracy_cls_rf = accuracy_score(y_test_cls, y_pred_cls_rf)
print("Random Forest Classification - Accuracy with best hyperparameters:", accuracy_cls_rf)


Best hyperparameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 100}
Random Forest Classification - Accuracy with best hyperparameters: 0.8938053097345132


405 fits failed out of a total of 1215.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
405 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/shubhamlolge/MSc AI/Applications of ML/.venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/shubhamlolge/MSc AI/Applications of ML/.venv/lib/python3.12/site-packages/sklearn/base.py", line 1467, in wrapper
    estimator._validate_params()
  File "/Users/shubhamlolge/MSc AI/Applications of ML/.venv/lib/python3.12/site-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "/Users/shubhamlolge/MSc AI/Appl