In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Data Preparation
# Load the dataset from the Downloads folder
# Make sure to adjust the file path to your specific operating system
file_path = 'C:/Users/Mycomputer/Downloads/housing.csv'


# Load the dataset
housing_data = pd.read_csv(file_path)

# Convert categorical variables to dummy variables
housing_data_encoded = pd.get_dummies(housing_data, drop_first=True)

# Step 2: Prepare Features and Target
X = housing_data_encoded.drop('price', axis=1)
y = housing_data_encoded['price']

# Step 3: Train-Test Split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build a Random Forest Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Step 5: Make Predictions and Evaluate the Model
y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest Mean Squared Error: {mse_rf}')
print(f'Random Forest R^2 Score: {r2_rf}')

# Step 6: Perform Randomized Search for Hyperparameter Tuning
# Define the parameter grid for Randomized Search
param_grid = {
    'n_estimators': [10, 50, 100, 200, 300, 400, 500],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10, 15, 20],
    'min_samples_leaf': [1, 2, 4, 6, 8]
}

# Perform Randomized Search
random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_grid, 
                                   n_iter=30, cv=5, random_state=42, n_jobs=-1)

# Fit the model using Randomized Search
random_search.fit(X_train, y_train)

# Step 7: Make Predictions and Evaluate the Optimized Model
y_pred_optimized = random_search.best_estimator_.predict(X_test)
mse_optimized = mean_squared_error(y_test, y_pred_optimized)
r2_optimized = r2_score(y_test, y_pred_optimized)

print(f'Optimized Random Forest Mean Squared Error: {mse_optimized}')
print(f'Optimized Random Forest R^2 Score: {r2_optimized}')
