In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import statsmodels.api as sm

In [2]:

# LOAD DATA
df = pd.read_csv('data/Davos_combined_clean.csv')
df_limited = df[(df['distance_skilift_meters'] <= 2500) & 
                (df['distance_zentrum_meters'] <= 2500) &
                (df['price_raw'] <= 4000)].copy()

# REGRESSION AND FLAG PIONTS
X = df_limited[['distance_skilift_meters', 'distance_zentrum_meters']]
y = df_limited['price_raw']
X_model = sm.add_constant(X)
model = sm.OLS(y, X_model).fit()

#'ABOVE' or 'BELOW' the trend
df_limited['predicted'] = model.predict(X_model)
df_limited['is_above'] = df_limited['price_raw'] >= df_limited['predicted']

# CREATE SURFACE GRID 
x_range = np.linspace(0, 2500, 25)
y_range = np.linspace(0, 2500, 25)
x_mesh, y_mesh = np.meshgrid(x_range, y_range)
z_mesh = model.params[0] + model.params[1] * x_mesh + model.params[2] * y_mesh

# 4. Build Interactive Plotly Figure
fig = go.Figure()

# Add the solid Regression Plane
fig.add_trace(go.Surface(
    x=x_range, y=y_range, z=z_mesh,
    colorscale='Blues', opacity=0.8, showscale=False,
    name='Regression Plane'
))

# Helper to plot above/below points with different intensities
def plot_category(data, name, color, faded=False):
    fig.add_trace(go.Scatter3d(
        x=data['distance_skilift_meters'],
        y=data['distance_zentrum_meters'],
        z=data['price_raw'],
        mode='markers',
        marker=dict(size=5, color=color, opacity=0.25 if faded else 0.95, line=dict(width=1, color='black')),
        name=name,
        showlegend=not faded,
        legendgroup=name
    ))

# Plot Hotels and Apartments
plot_category(df_limited[(df_limited['type'] == 'hotel') & (df_limited['is_above'])], 'Hotels', 'blue')
plot_category(df_limited[(df_limited['type'] == 'hotel') & (~df_limited['is_above'])], 'Hotels', 'blue', faded=True)
plot_category(df_limited[(df_limited['type'] == 'ferienwohnung') & (df_limited['is_above'])], 'Apartments', 'orange')
plot_category(df_limited[(df_limited['type'] == 'ferienwohnung') & (~df_limited['is_above'])], 'Apartments', 'orange', faded=True)

# 5. Finalize Layout
fig.update_layout(
    title='Multiple linear regression Location vs. Price',
    scene=dict(
        xaxis=dict(title='Dist. to Ski Lift (m)', range=[0, 2500]),
        yaxis=dict(title='Dist. to Center (m)', range=[0, 2500]),
        zaxis=dict(title='Price (CHF, Weekend)', range=[0, 4000])
    ),
   width=1000, height=800
)

fig.show()

  z_mesh = model.params[0] + model.params[1] * x_mesh + model.params[2] * y_mesh


In [4]:
import numpy as np
from sklearn.metrics import mean_squared_error

# --- Model Evaluation ---

# R-squared (already provided by statsmodels)
r2 = model.rsquared

# Predictions and RMSE
y_true = y
y_pred = model.predict(X_model)

rmse = np.sqrt(mean_squared_error(y_true, y_pred))

print(f"R²: {r2:.3f}")
print(f"RMSE: {rmse:.2f}")


R²: 0.005
RMSE: 856.30
