In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import pandas as pd
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Load and preprocess the dataset
col_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE',
             'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
df_housing = pd.read_csv('housing data.csv', sep=',', names=col_names, header=None)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df_housing.iloc[:, :-1], df_housing['MEDV'], test_size=0.2, random_state=42)

# Define the model and its hyperparameters
#model = LinearRegression()
model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

# Define the hyperparameters you want to test
param_grid = {'linearregression__fit_intercept': [True, False],
              'linearregression__normalize': [True, False]}

# Create a GridSearchCV object with your model and hyperparameters
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring=['neg_mean_squared_error', 'r2'], refit='r2', cv=5)

# Fit the GridSearchCV object to your training data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and the best score(s)
print("Best hyperparameters: ", grid_search.best_params_)
print("Best r2 score: ", grid_search.best_score_)
print("Best neg_mean_squared_error: ", grid_search.cv_results_['mean_test_neg_mean_squared_error'][grid_search.best_index_])

# Use the best model to make predictions on your testing data
y_pred = grid_search.predict(X_test)

# Evaluate the model's performance on the testing data
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MSE: ", mse)
print("R2 score: ", r2)



from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# Define the pipelines
linear_pipeline = make_pipeline(StandardScaler(), LinearRegression())
rf_pipeline = make_pipeline(StandardScaler(), RandomForestRegressor())

# Define the hyperparameters you want to test for each pipeline
linear_param_grid = {'linearregression__fit_intercept': [True, False],
                     'linearregression__normalize': [True, False]}
rf_param_grid = {'randomforestregressor__n_estimators': [10, 50, 100],
                 'randomforestregressor__max_depth': [None, 5, 10]}

# Create a GridSearchCV object for each pipeline
linear_grid_search = GridSearchCV(linear_pipeline, linear_param_grid, cv=5)
rf_grid_search = GridSearchCV(rf_pipeline, rf_param_grid, cv=5)

# Fit the GridSearchCV objects to your training data
linear_grid_search.fit(X_train, y_train)
rf_grid_search.fit(X_train, y_train)

# Print the best hyperparameters and the best score(s) for each pipeline
print("Best hyperparameters for Linear Regression: ", linear_grid_search.best_params_)
print("Best r2 score for Linear Regression: ", linear_grid_search.best_score_)

print("Best hyperparameters for Random Forest: ", rf_grid_search.best_params_)
print("Best r2 score for Random Forest: ", rf_grid_search.best_score_)
