In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Fetch data, pick features/target, split into training/test data, decide seed
housing = fetch_california_housing(as_frame=True).frame
features = housing[['MedInc', 'AveRooms', 'Latitude', 'Longitude']]
target = housing['MedHouseVal']
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.3, random_state = 964)
seed = 964

# Get model and train it
random_forest_model = RandomForestRegressor(random_state=seed)
random_forest_model.fit(features_train, target_train)

# Check accuracy through RMSE
target_predictions = random_forest_model.predict(features_test)
RMSE = mean_squared_error(target_test, target_predictions, squared=False)
avg_error = round(RMSE * 100000, 2)
print(f"On average, house estimation values are ${avg_error:.2f} off.")

In [None]:
# Visualize data spread with histograms
housing.hist(bins = 30, figsize = (20, 20));
# Statistical summary of the data
housing.describe()

In [None]:
# Visualize correlations with a heatmap
correlation_matrix = housing.corr().round(2)
sns.heatmap(data=correlation_matrix,cmap='coolwarm',annot=True);

In [None]:
# Visualize feature importance for random forest regression model with a bar graph
importances = random_forest_model.feature_importances_
importances = pd.DataFrame({'feature': features.columns, 'importance': np.round(importances, 2)})
importances = importances.sort_values('importance', ascending=False).set_index('feature')
importances.plot.barh();
print(importances)

In [None]:
def get_user_input():
    while True:
        try:
            rooms = float(input("Enter number of rooms: "))
            if rooms < 0:
                raise ValueError("Number of rooms can't be negative.")
            break
        except ValueError as e:
            print(f"Error: {e}")

    while True:
        try:
            lat = float(input("Enter latitude(32.5 - 42): "))
            if lat < 32.5 or lat > 42:
                raise ValueError("Latitude must be between 32.5 and 42.")
            break
        except ValueError as e:
            print(f"Error: {e}")

    while True:
        try:
            lng = float(input("Enter longitude(-124.65 - -114.13): "))
            if lng < -124.65 or lng > -114.13:
                raise ValueError("Longitude must be between -124.65 and -114.13.")
            break
        except ValueError as e:
            print(f"Error: {e}")

    while True:
        try:
            income = float(input("Enter yearly income of current/previous household: "))
            if income < 0:
                raise ValueError("Income can't be negative.")
            break
        except ValueError as e:
            print(f"Error: {e}")

    return rooms, lat, lng, income / 10000

In [None]:
def predict_property_value():
    rooms, lat, lng, income = get_user_input()
    user_input = pd.DataFrame({'MedInc': [income], 'AveRooms': [rooms], 'Latitude': [lat], 'Longitude': [lng]})
    prediction = tuned_min_random_forest.predict(user_input)
    print(f"Your house is estimated to be worth ${prediction[0] * 100000:.2f} dollars.")

In [None]:
# Run this cell by clicking shift + enter to begin predicting property values
predict_property_value()