# Predicting the price of the places.

In [2]:
import pandas as pd

merged_score = pd.read_csv('/workspaces/Coworking/src/results/MergedPlacesScore.csv')

center_distance = pd.read_csv('/workspaces/Coworking/src/results/PlacesCenterDistance.csv')

df= pd.merge(merged_score, center_distance[['Place Name', 'distance_from_center']], left_on='name', right_on='Place Name', how='left')

df = df.drop(columns=['Place Name'])

df.head()

Unnamed: 0,name,Country,Neightboorhood,Population,Median Household Income,Percentage of population between 16 and 64 years,Transport,Day Pass,Month Pass,Address,Latitude,Longitude,Rating,User Rating Count,Weighted Rating,Score,distance_from_center
0,"WORKVILLE - Flexible Office Space, Conference ...",USA,Midtown,58789,192347.0,0.7,6,49.0,300.0,"1412 Broadway 21st Floor, New York, NY 10018, USA",40.753615,-73.986561,5.0,349,1745.0,1.0,4.820739
1,Rise New York,USA,Midtown,58789,192347.0,0.7,6,0.0,250.0,"43 W 23rd St, New York, NY 10010, USA",40.742465,-73.991176,4.7,258,1212.6,0.879777,3.524241
2,The Farm SoHo NYC - Coworking Office Space and...,USA,SoHo,11675,175209.0,0.7,4,29.0,179.0,"447 Broadway 2nd floor, New York, NY 10013, USA",40.720573,-74.001415,4.5,257,1156.5,0.850917,0.946117
3,OASIS by Workville - Conference Center & Corpo...,USA,Midtown,58789,192347.0,0.7,6,49.0,350.0,"315 W 35th St Floor 11 - 15, New York, NY 1000...",40.753345,-73.993604,5.0,162,810.0,0.839255,4.622594
4,The New Work Project,USA,Williansburg,138980,97432.0,0.6,2,40.0,370.0,"97 N 10th St #2A, Brooklyn, NY 11249, USA",40.721002,-73.958085,5.0,126,630.0,0.808309,4.149734


In [3]:
df.to_csv('/workspaces/Coworking/src/results/MergedPlacesScoreDistance.csv', index=False)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 17 columns):
 #   Column                                            Non-Null Count  Dtype  
---  ------                                            --------------  -----  
 0   name                                              19 non-null     object 
 1   Country                                           19 non-null     object 
 2   Neightboorhood                                    19 non-null     object 
 3   Population                                        19 non-null     int64  
 4   Median Household Income                           19 non-null     float64
 5   Percentage of population between 16 and 64 years  19 non-null     float64
 6   Transport                                         19 non-null     int64  
 7   Day Pass                                          19 non-null     float64
 8   Month Pass                                        19 non-null     float64
 9   Address                

In [5]:
# Drop the Address column as it doesn't add predictive value
df = df.drop(columns=["Address"])

# One-Hot Encode categorical columns like 'Country' and 'Neightboorhood'
df_encoded = pd.get_dummies(df, columns=["Country", "Neightboorhood"], drop_first=True)


In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Load the numerical data

# Define features (X) and target variable (y)
X = df_encoded.drop(columns=["Day Pass", 'name'])  # All columns except target
y = df_encoded["Day Pass"]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Ridge Regression model
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
ridge_preds = ridge_model.predict(X_test)

# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

# Evaluate models
ridge_mae = mean_absolute_error(y_test, ridge_preds)
ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge_preds))

rf_mae = mean_absolute_error(y_test, rf_preds)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_preds))

print(f'RidgeMAE=',ridge_mae, 'RidgeRMSE=',ridge_rmse, 'RandonForestMAE=',rf_mae, 'RandonForestRMSE=',rf_rmse)


RidgeMAE= 19.564053513624636 RidgeRMSE= 23.976474032068868 RandonForestMAE= 15.936125000000015 RandonForestRMSE= 21.673783677117846


In [7]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for Ridge Regression
ridge_param_grid = {
    'alpha': [0.1, 1.0, 10.0, 100.0]
}

# Define the parameter grid for Random Forest Regressor
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30]
}

# Initialize GridSearchCV for Ridge Regression
ridge_grid_search = GridSearchCV(Ridge(), ridge_param_grid, cv=5, scoring='neg_mean_absolute_error')
ridge_grid_search.fit(X_train, y_train)

# Initialize GridSearchCV for Random Forest Regressor
rf_grid_search = GridSearchCV(RandomForestRegressor(random_state=42), rf_param_grid, cv=5, scoring='neg_mean_absolute_error')
rf_grid_search.fit(X_train, y_train)

# Get the best parameters and best score for Ridge Regression
best_ridge_params = ridge_grid_search.best_params_
best_ridge_score = -ridge_grid_search.best_score_

# Get the best parameters and best score for Random Forest Regressor
best_rf_params = rf_grid_search.best_params_
best_rf_score = -rf_grid_search.best_score_

print(f'Best Ridge Params: {best_ridge_params}, Best Ridge MAE: {best_ridge_score}')
print(f'Best RF Params: {best_rf_params}, Best RF MAE: {best_rf_score}')

Best Ridge Params: {'alpha': 10.0}, Best Ridge MAE: 3.580453545510944
Best RF Params: {'max_depth': None, 'n_estimators': 100}, Best RF MAE: 4.953726666666663


In [8]:
# Train the best Random Forest model using the best parameters
best_rf_model = RandomForestRegressor(**best_rf_params, random_state=42)
best_rf_model.fit(X_train, y_train)

# Predict the Day Pass values using the best model
best_rf_preds = best_rf_model.predict(X_test)

# Evaluate the best model
best_rf_mae = mean_absolute_error(y_test, best_rf_preds)
best_rf_rmse = np.sqrt(mean_squared_error(y_test, best_rf_preds))

print(f'Best Random Forest MAE: {best_rf_mae}')
print(f'Best Random Forest RMSE: {best_rf_rmse}')

Best Random Forest MAE: 15.936125000000015
Best Random Forest RMSE: 21.673783677117846


In [9]:
df["Predicted_Price_RF"] = best_rf_model.predict(X) 

In [18]:
import joblib

# Save the model
joblib.dump(best_rf_model, "/workspaces/Coworking/src/results/random_forest_model.pkl")

['/workspaces/Coworking/src/results/random_forest_model.pkl']

In [13]:
user_max_price = 100  # Example value
user_min_rating = 4.0  # Example value
user_max_distance = 10  # Example in km

filtered_spaces = df[
    (df["Predicted_Price_RF"] <= user_max_price) &
    (df["Rating"] >= user_min_rating) &
    (df["distance_from_center"] <= user_max_distance)
]

filtered_spaces = filtered_spaces.sort_values(by="Predicted_Price_RF")

filtered_spaces.head()


Unnamed: 0,name,Country,Neightboorhood,Population,Median Household Income,Percentage of population between 16 and 64 years,Transport,Day Pass,Month Pass,Latitude,Longitude,Rating,User Rating Count,Weighted Rating,Score,distance_from_center,Predicted_Price_RF
15,Pasela Co-work Higashishinjuku,Japan,Higashishinjuku,25000,46506.82,0.65,3,20.67,310.05,35.698054,139.709368,4.2,248,1041.6,0.814829,4.837686,23.0036
12,EslabON Coworking,Spain,Opañel,33883,44714.49,0.7,4,21.74,259.42,40.38691,-3.723964,4.9,55,269.5,0.762744,3.734462,23.6981
14,Basis Point Shinbashi,Japan,Shinbashi,100000,67176.52,0.65,4,25.84,413.39,35.666838,139.759638,4.3,328,1410.4,0.902,1.775414,27.4953
10,Freeland,Spain,Adelfas,18832,73676.81,0.65,5,28.99,217.39,40.398585,-3.671745,4.8,162,777.6,0.898047,3.390389,28.3187
13,Area Coworking,Spain,Delicias,28575,65785.51,0.7,4,28.99,347.83,40.397134,-3.691715,4.9,37,181.3,0.737628,2.412662,28.839


In [14]:
normalized_price = (user_max_price - filtered_spaces["Predicted_Price_RF"]) / user_max_price
normalized_rating = (filtered_spaces["Rating"] - user_min_rating) / (5 - user_min_rating)
normalized_distance = (user_max_distance - filtered_spaces["distance_from_center"]) / user_max_distance
normalize_score = (normalized_price + normalized_rating + normalized_distance) / 3
normalize_user_count = (filtered_spaces["User Rating Count"] - filtered_spaces["User Rating Count"].min()) / (filtered_spaces["User Rating Count"].max() - filtered_spaces["User Rating Count"].min())

In [15]:
w_transport = 0.1  # Example weight for transport quality

normalized_transport = filtered_spaces["Transport"] / filtered_spaces["Transport"].max()

predicted_score = 0.4 * normalized_price + 0.4 * normalized_rating + 0.2 * normalized_distance
predicted_score += w_transport * normalized_transport

filtered_spaces["Predicted Score"] = predicted_score

filtered_spaces = filtered_spaces.sort_values(by="Predicted Score", ascending=False)

In [16]:
filtered_spaces.head()

Unnamed: 0,name,Country,Neightboorhood,Population,Median Household Income,Percentage of population between 16 and 64 years,Transport,Day Pass,Month Pass,Latitude,Longitude,Rating,User Rating Count,Weighted Rating,Score,distance_from_center,Predicted_Price_RF,Predicted Score
13,Area Coworking,Spain,Delicias,28575,65785.51,0.7,4,28.99,347.83,40.397134,-3.691715,4.9,37,181.3,0.737628,2.412662,28.839,0.836391
12,EslabON Coworking,Spain,Opañel,33883,44714.49,0.7,4,21.74,259.42,40.38691,-3.723964,4.9,55,269.5,0.762744,3.734462,23.6981,0.830518
6,OneCoWork Plaça Catalunya | Barcelona Coworking,Spain,Ciutat Vella,45671,18133.33,0.75,5,36.23,340.58,41.386887,2.172505,4.8,310,1488.0,0.911691,0.331584,35.401,0.821764
7,OneCoWork Catedral | Barcelona Coworking,Spain,Ciutat Vella,45671,18133.33,0.75,5,36.23,384.06,41.384991,2.176764,4.8,205,984.0,0.830505,0.733386,35.4638,0.813477
11,Coworking LOOM Huertas,Spain,Cortes,10816,57982.61,0.76,10,43.48,398.55,40.414036,-3.700693,4.6,97,446.2,0.779349,0.404649,31.2795,0.806789


In [17]:
filtered_spaces.to_csv('/workspaces/Coworking/src/results/RecommendedSpaces.csv', index=False)