#  Predict Airbnb Price Using the Trained Model
This notebook demonstrates how to use the trained model (`final_model.pkl`) to make predictions on new data using a custom input function.


In [11]:
#  Load libraries
import pandas as pd
import numpy as np
import joblib

#  Load the trained model
model = joblib.load("final_model.pkl")  # or "final_model.pkl" if in same folder



In [13]:
# ⚙️ Preprocessing Function for Raw Input

final_model_columns = [
    'latitude', 'longitude', 'calculated_host_listings_count', 'availability_365',
    'log_minimum_nights', 'log_number_of_reviews', 'log_reviews_per_month',
    'neighbourhood_group_Brooklyn', 'neighbourhood_group_Manhattan',
    'neighbourhood_group_Queens', 'neighbourhood_group_Staten Island',
    'room_type_Private room', 'room_type_Shared room',
    'neighbourhood_grouped_Bushwick', 'neighbourhood_grouped_Crown Heights',
    'neighbourhood_grouped_East Village', 'neighbourhood_grouped_Harlem',
    "neighbourhood_grouped_Hell's Kitchen", 'neighbourhood_grouped_Midtown',
    'neighbourhood_grouped_Other', 'neighbourhood_grouped_Upper East Side',
    'neighbourhood_grouped_Upper West Side', 'neighbourhood_grouped_Williamsburg'
]

def preprocess_input(raw_input):
    """
    Converts raw dictionary input into the format required by the model.
    """
    processed = {
        'latitude': raw_input['latitude'],
        'longitude': raw_input['longitude'],
        'calculated_host_listings_count': raw_input['calculated_host_listings_count'],
        'availability_365': raw_input['availability_365'],
        'log_minimum_nights': np.log(raw_input['minimum_nights']),
        'log_number_of_reviews': np.log(raw_input['number_of_reviews'] + 1),
        'log_reviews_per_month': np.log(raw_input['reviews_per_month'] + 1e-5),
    }

    for group in ['Brooklyn', 'Manhattan', 'Queens', 'Staten Island']:
        col = f'neighbourhood_group_{group}'
        processed[col] = 1 if raw_input['neighbourhood_group'] == group else 0

    for rt in ['Private room', 'Shared room']:
        col = f'room_type_{rt}'
        processed[col] = 1 if raw_input['room_type'] == rt else 0

    top_neighs = ['Bushwick', 'Crown Heights', 'East Village', 'Harlem',
                  "Hell's Kitchen", 'Midtown', 'Upper East Side',
                  'Upper West Side', 'Williamsburg']
    
    matched = False
    for neigh in top_neighs:
        col = f'neighbourhood_grouped_{neigh}'
        processed[col] = 1 if raw_input['neighbourhood'] == neigh else 0
        if raw_input['neighbourhood'] == neigh:
            matched = True

    processed['neighbourhood_grouped_Other'] = 1 if not matched else 0

    df_input = pd.DataFrame([processed])
    df_input = df_input.reindex(columns=final_model_columns, fill_value=0)

    return df_input


In [17]:
# 🤖 Define Prediction Function

def predict_price_from_input(model, raw_input):
    """
    Takes raw input and returns predicted Airbnb price.
    """
    df_input = preprocess_input(raw_input)
    log_price = model.predict(df_input)[0]
    price = np.exp(log_price)
    return round(price, 2)


In [19]:
# 📊 Sample Input and Prediction

sample_input = {
    'latitude': 40.7128,
    'longitude': -73.9352,
    'calculated_host_listings_count': 2,
    'availability_365': 120,
    'minimum_nights': 2,
    'number_of_reviews': 10,
    'reviews_per_month': 1.2,
    'neighbourhood_group': 'Brooklyn',
    'room_type': 'Private room',
    'neighbourhood': 'Other'
}

predicted_price = predict_price_from_input(model, sample_input)
print(f"Predicted Airbnb Price: ${predicted_price}")


Predicted Airbnb Price: $81.31
