In [None]:
# Install required libraries
%pip install pandas numpy scikit-learn matplotlib seaborn shap missingno

: 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import shap

# Load dataset (assume downloaded from Kaggle)
df = pd.read_csv("Airbnb_Open_Data.csv", low_memory=False)

# Initial inspection
print(df.shape)
df.head()

In [None]:
# Drop non-actionable columns
df = df.drop(columns=['id', 'host id', 'NAME', 'host name', 'license', 'country'])

# Convert price to numeric
df['price'] = df['price'].replace('[\$,]', '', regex=True).astype(float)

# Filter price outliers (top 2%)
price_upper_limit = df['price'].quantile(0.98)
df = df[df['price'] <= price_upper_limit]

# Handle missing values
df = df.dropna(subset=['price'])  # Remove listings without price
df['review rate number'] = df['review rate number'].fillna(df['review rate number'].median())
df = df.drop(columns=['house_rules', 'lat', 'long'])  # High missingness

# Encode categorical features
categorical_cols = ['neighbourhood group', 'room type', 'instant_bookable']
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# Final cleaning
df = df.dropna()  # Remove remaining missing values
print(f"Final dataset shape: {df.shape}")

In [None]:
# Select key modifiable features (identified via EDA)
modifiable_features = [
    'room type_Private room', 
    'room type_Shared room',
    'review rate number',
    'instant_bookable_True',
    'Construction year',
    'minimum nights',
    'number of reviews'
]

X = df[modifiable_features]
y = df['price']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)
print(f"Train MAE: {mean_absolute_error(y_train, train_preds):.2f}")
print(f"Test MAE: {mean_absolute_error(y_test, test_preds):.2f}")

6

In [None]:
# Initialize SHAP explainer
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

# Global feature importance
shap.summary_plot(shap_values, X_test, feature_names=modifiable_features)

# Local explanation for a single listing
sample_idx = 0
shap.force_plot(
    explainer.expected_value, 
    shap_values[sample_idx], 
    X_test.iloc[sample_idx],
    feature_names=modifiable_features
)

7

In [None]:
def generate_host_recommendations(listing_data):
    """
    Generates price optimization recommendations for hosts
    based on model feature contributions.
    """
    # Get SHAP values for the listing
    listing_shap = explainer.shap_values(listing_data.values.reshape(1, -1))[0]
    
    recommendations = []
    feature_effects = dict(zip(modifiable_features, listing_shap))
    
    # Generate suggestions for modifiable features
    if 'room type_Private room' in feature_effects:
        if feature_effects['room type_Private room'] < -10:
            recommendations.append(
                "⚠️ Consider upgrading to ENTIRE HOME: Private rooms reduce price by "
                f"${abs(feature_effects['room type_Private room']):.2f} on average"
            )
            
    if 'instant_bookable_True' in feature_effects:
        if feature_effects['instant_bookable_True'] > 5:
            recommendations.append(
                "✅ Enable INSTANT BOOKING: Increases price by "
                f"${feature_effects['instant_bookable_True']:.2f}"
            )
    
    if 'review rate_number' in feature_effects:
        if feature_effects['review rate_number'] > 8:
            recommendations.append(
                "⭐ Maintain HIGH RATINGS (Current: {listing_data['review rate_number']}/5): "
                f"+${feature_effects['review rate_number']:.2f} to price"
            )
    
    return recommendations

# Example usage
sample_listing = X_test.iloc[0]
print("Host Recommendations:")
for rec in generate_host_recommendations(sample_listing):
    print(f"- {rec}")