## Part 1: Determinants of Apartment Prices

In [4]:
import statsmodels.api as sm
import pandas as pd
import numpy as np

# Load the Excel file with dummy variables
file_path = '/workspaces/fdap-2024-Milad-Zaman/Apartment_Analysis/2.Data cleaning/final_data.xlsx'
df_with_dummies = pd.read_excel(file_path)

# Ensure all relevant columns are numeric
numeric_columns = ['Size (Zimmergröße/Größe)', 'Rooms', 'near_to_uni', 'near_to_airport', 'near_to_hbf', 'near_to_city_center', 'Price (Gesamtmiete)']
for col in numeric_columns:
    df_with_dummies[col] = pd.to_numeric(df_with_dummies[col], errors='coerce')

# Handle missing values by dropping rows with any missing values
df_with_dummies.dropna(subset=numeric_columns, inplace=True)

# Define the independent variables
X = df_with_dummies[['Size (Zimmergröße/Größe)', 'Rooms', 'near_to_uni', 'near_to_airport', 'near_to_hbf', 'near_to_city_center']]
# Define the dependent variable (rental prices)
y = df_with_dummies['Price (Gesamtmiete)']

# Add a constant to the model (intercept)
X = sm.add_constant(X)

# Fit the OLS model
model = sm.OLS(y, X).fit()

# Print the summary of the regression model
print(model.summary())


                             OLS Regression Results                            
Dep. Variable:     Price (Gesamtmiete)   R-squared:                       0.170
Model:                             OLS   Adj. R-squared:                  0.155
Method:                  Least Squares   F-statistic:                     11.67
Date:                 Fri, 12 Jul 2024   Prob (F-statistic):           6.57e-12
Time:                         11:53:07   Log-Likelihood:                -2235.1
No. Observations:                  350   AIC:                             4484.
Df Residuals:                      343   BIC:                             4511.
Df Model:                            6                                         
Covariance Type:             nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const         

## Part 2: Determinants of WG Listing Duration

In [5]:
import statsmodels.api as sm
import pandas as pd

# Load the Excel file
file_path = '/workspaces/fdap-2024-Milad-Zaman/Apartment_Analysis/2.Data cleaning/final_data.xlsx'
df= pd.read_excel(file_path)

# Ensure all relevant columns are numeric
numeric_columns = ['Price (Gesamtmiete)', 'Size (Zimmergröße/Größe)', 'Rooms', 
                   'near_to_uni', 'near_to_airport', 'near_to_hbf', 'near_to_city_center', 
                   'Availability_in_hours']
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Handle missing values by dropping rows with any missing values
df.dropna(subset=numeric_columns, inplace=True)

# Define the independent variables
X = df[['Price (Gesamtmiete)', 'Size (Zimmergröße/Größe)', 'Rooms', 
                     'near_to_uni', 'near_to_airport', 'near_to_hbf', 'near_to_city_center']]
# Define the dependent variable (duration the listing remains online)
y = df['Availability_in_hours']

# Add a constant to the model (intercept)
X = sm.add_constant(X)

# Fit the OLS model
model = sm.OLS(y, X).fit()

# Print the summary of the regression model
print(model.summary())

                              OLS Regression Results                             
Dep. Variable:     Availability_in_hours   R-squared:                       0.832
Model:                               OLS   Adj. R-squared:                  0.829
Method:                    Least Squares   F-statistic:                     242.2
Date:                   Fri, 12 Jul 2024   Prob (F-statistic):          2.15e-128
Time:                           11:54:27   Log-Likelihood:                -2015.5
No. Observations:                    350   AIC:                             4047.
Df Residuals:                        342   BIC:                             4078.
Df Model:                              7                                         
Covariance Type:               nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------