In [2]:
#%%
# MLP Price Prediction Notebook
# This notebook loads Airbnb listing data, preprocesses it, splits into training and validation sets,
# and builds a simple Multi-Layer Perceptron (MLP) model to predict listing prices.

#%%
# 1. Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

#%%
# 2. Load Data
# Adjust the path if needed
data = pd.read_csv('listings_com.csv')  # or 'listings.csv' depending on filename

# Quick peek
print(data.shape)
data.head()

#%%
# 3. Basic Preprocessing
# Select target and features
# Drop rows with missing price
data = data.dropna(subset=['price'])
# Convert price to numeric (remove currency symbols)
data['price'] = data['price'].replace({'\$': '', ',': ''}, regex=True).astype(float)

# Example feature selection: numeric and categorical
numeric_features = ['accommodates', 'bathrooms', 'bedrooms', 'beds', 'number_of_reviews', 'review_scores_rating']
cat_features = ['room_type', 'neighbourhood']  # adapt based on available columns

# Drop rows with missing in selected features
data = data.dropna(subset=numeric_features + cat_features)

X = data[numeric_features + cat_features]
y = data['price']

#%%
# 4. Split into Training and Validation Sets
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f"Training samples: {X_train.shape[0]}, Validation samples: {X_val.shape[0]}")

#%%
# 5. Build Preprocessing Pipeline
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, cat_features)
    ]
)

#%%
# 6. Prepare Data
# Fit and transform training data, transform validation data
X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val)

# Determine input dimension
input_dim = X_train_proc.shape[1]
print(f"Input dimension after preprocessing: {input_dim}")

#%%
# 7. Build MLP Model
model = Sequential([
    Dense(64, activation='relu', input_shape=(input_dim,)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='linear')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
model.summary()

#%%
# 8. Train Model
history = model.fit(
    X_train_proc, y_train,
    validation_data=(X_val_proc, y_val),
    epochs=50,
    batch_size=32
)

#%%
# 9. Evaluate
val_pred = model.predict(X_val_proc).flatten()
mse = mean_squared_error(y_val, val_pred)
r2 = r2_score(y_val, val_pred)
print(f"Validation MSE: {mse:.2f}")
print(f"Validation R2: {r2:.2f}")

#%%
# 10. Plot Training History
import matplotlib.pyplot as plt

plt.figure()
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.title('Training vs Validation Loss')
plt.show()


  data['price'] = data['price'].replace({'\$': '', ',': ''}, regex=True).astype(float)


ModuleNotFoundError: No module named 'tensorflow.python'