In [2]:
from sklearn.linear_model import LinearRegression
import pandas as pd
import joblib

# Load the trained model
model = joblib.load('./Resources/optimized_predict_pricing.pkl')

# Load the dataset for reference (you need to provide the path to your dataset)
df_encoded1 = pd.read_csv('Resources/df_encoded1.csv')  # Update the path if necessary

# Check the columns of df_encoded1
print("Columns in df_encoded1:", df_encoded1.columns)

# Function to handle default values for user inputs
def get_user_input(prompt, default_value, value_type):
    user_input = input(f"{prompt} (default {default_value}): ")
    if not user_input:
        return value_type(default_value)
    try:
        return value_type(user_input)
    except ValueError:
        print(f"Invalid input. Using default value: {default_value}")
        return value_type(default_value)

# Preprocessing function (to ensure consistency with model training)
def preprocess_input_data(input_data):
    # One-hot encode the 'city' and 'zipcode' columns, similar to training time
    input_data_encoded = pd.get_dummies(input_data, columns=['city', 'zipcode'], drop_first=True)
    
    # Get all the columns from the original dataset used for training, including one-hot encoding
    model_columns = list(df_encoded1.columns)
    
    # Ensure all columns are present (add missing columns with 0)
    missing_cols = set(model_columns) - set(input_data_encoded.columns)
    for col in missing_cols:
        input_data_encoded[col] = 0  # Add missing columns with 0
    
    # Ensure the column order is the same as during training
    input_data_encoded = input_data_encoded[model_columns]
    
    return input_data_encoded

# Main function for suggestion
def suggest_property():
    print("Please enter the following details (leave empty to use default values):")
    
    # User inputs with defaults, using helper function
    bedrooms = get_user_input("Number of Bedrooms", 3, int)
    bathrooms = get_user_input("Number of Bathrooms", 2.0, float)
    sqft_living = get_user_input("Square Feet of Living Area", 1500, int)
    avg_income = get_user_input("Average Income", 65000, float)
    city = get_user_input("City", "Seattle", str)
    zipcode = get_user_input("Zipcode", "98105", str)
    min_price = int(input("Minimum Price: "))
    max_price = int(input("Maximum Price: "))

    # Prepare the input data for prediction
    input_data = pd.DataFrame([[bedrooms, bathrooms, sqft_living, city, zipcode, avg_income]],
                              columns=['bedrooms', 'bathrooms', 'sqft_living', 'city', 'zipcode', 'avg_income'])

    # Preprocess the input data (encode categorical variables)
    input_data_encoded = preprocess_input_data(input_data)

    # Make the prediction using the trained model
    predicted_price = model1.predict(input_data_encoded)[0]
    print(f"The predicted price for your request is: ${predicted_price:,.2f}")

    # Filter dataset for price range
    filtered_df = df_encoded1[(df_encoded1['price'] >= min_price) & (df_encoded1['price'] <= max_price)]

    # Find the closest predicted price in the filtered dataset
    closest = filtered_df.iloc[(filtered_df['price'] - predicted_price).abs().argmin()]

    # Suggest the best city and zipcode
    print(f"Suggested City: {closest['city']}")
    print(f"Suggested Zipcode: {closest['zipcode']}")
    print(f"Price in Suggested City: ${closest['price']:,.2f}")

# Run the suggestion function
suggest_property()

FileNotFoundError: [Errno 2] No such file or directory: './Resources/optimized_predict_pricing.pkl'

In [24]:
import joblib

    # Load the model from the file
model = joblib.load('../static_data/optimized_predict_pricing.pkl')

In [25]:
print("Feature names:", model.feature_names_in_)

Feature names: ['bedrooms' 'bathrooms' 'sqft_living' 'avg_income' 'city_Auburn'
 'city_Bellevue' 'city_Federal Way' 'city_Kent' 'city_Kirkland'
 'city_Redmond' 'city_Renton' 'city_Sammamish' 'city_Seattle'
 'zipcode_98001' 'zipcode_98002' 'zipcode_98003' 'zipcode_98004'
 'zipcode_98005' 'zipcode_98006' 'zipcode_98007' 'zipcode_98008'
 'zipcode_98023' 'zipcode_98030' 'zipcode_98031' 'zipcode_98032'
 'zipcode_98033' 'zipcode_98034' 'zipcode_98042' 'zipcode_98052'
 'zipcode_98053' 'zipcode_98055' 'zipcode_98056' 'zipcode_98058'
 'zipcode_98059' 'zipcode_98074' 'zipcode_98075' 'zipcode_98092'
 'zipcode_98102' 'zipcode_98103' 'zipcode_98105' 'zipcode_98106'
 'zipcode_98107' 'zipcode_98108' 'zipcode_98109' 'zipcode_98112'
 'zipcode_98115' 'zipcode_98116' 'zipcode_98117' 'zipcode_98118'
 'zipcode_98119' 'zipcode_98122' 'zipcode_98125' 'zipcode_98126'
 'zipcode_98133' 'zipcode_98136' 'zipcode_98144' 'zipcode_98146'
 'zipcode_98148' 'zipcode_98155' 'zipcode_98166' 'zipcode_98168'
 'zipcode_9817

In [27]:
print("Target name:", model.target_name)

AttributeError: 'LinearRegression' object has no attribute 'target_name'