## Setting directory and loading packages

In [1]:
import pandas as pd
import os # use this to access your environment variables
import numpy as np
import statsmodels.api as sm
# For matplotlib
import matplotlib.pyplot as plt
# For seaborn
import seaborn as sns

In [171]:
os.chdir("C:/Users/Laven/Documents/Data_Analysis/Capstone_Project/")


## Loading Data

In [143]:
bars_and_restaurants_df_no_duplicates_utf8 = pd.read_csv('bars_and_restaurants_df_no_duplicates_utf8.csv')

In [6]:
bars_in_montreal_collected_unique_utf8 = pd.read_csv('bars_in_montreal_collected_unique_utf8.csv')

In [7]:
restaurants_in_montreal_collected_unique_utf8 = pd.read_csv('restaurants_in_montreal_collected_unique_utf8.csv')


In [9]:
complete_citybikes_df = pd.read_csv('complete_citybikes_df.csv')

In [11]:
montreal_subway_df = pd.read_csv('montreal_subway_stations.csv')

In [12]:
montreal_light_rail = pd.read_csv('montreal_light_rail.csv')

In [137]:
montreal_transport_stations = pd.read_csv('montreal_transport_stations.csv')


## Defining Functions To Calculate Distance

In [13]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance in kilometers between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    r = 6371 # Radius of earth in kilometers
    return c * r

In [14]:
def find_closest_points(list1, list2):
    closest_points = []
    for lat1, lon1 in list1:
        min_distance = float('inf')
        closest_point = None
        for lat2, lon2 in list2:
            distance = haversine(lon1, lat1, lon2, lat2)
            if distance < min_distance:
                min_distance = distance
                closest_point = (lat2, lon2)
        closest_points.append(((lat1, lon1), closest_point, min_distance))
    return closest_points

In [15]:
def count_nearby_stations(establishment_coords, station_coords, max_distance = 1):
    # This will hold the count of nearby stations for each establishment
    nearby_stations_count = []
    
    for est in establishment_coords:
        count = 0
        for station in station_coords:
            distance = haversine(est[1], est[0], station[1], station[0])
            if distance <= max_distance:
                count += 1
        nearby_stations_count.append(count)
        
    return nearby_stations_count

## Getting My Lists Of Coordinate Tuples

In [144]:
establishments_coords = list(zip(bars_and_restaurants_df_no_duplicates_utf8['Latitude'], bars_and_restaurants_df_no_duplicates_utf8['Longitude']))


In [145]:
bars_coords = list(zip(bars_in_montreal_collected_unique_utf8['Latitude'], bars_in_montreal_collected_unique_utf8['Longitude']))

In [146]:
restaurants_coords = list(zip(restaurants_in_montreal_collected_unique_utf8['Latitude'], restaurants_in_montreal_collected_unique_utf8['Longitude']))

In [147]:
citybikes_coords = list(zip(complete_citybikes_df['Latitude'], complete_citybikes_df['Longitude']))

In [148]:
subway_coords = list(zip(montreal_subway_df['Latitude'], montreal_subway_df['Longitude']))


In [149]:
light_rail_coords = list(zip(montreal_light_rail['Latitude'], montreal_light_rail['Longitude']))


In [150]:
transport_station_coords = list(zip(montreal_transport_stations['Latitude'], montreal_transport_stations['Longitude']))




## Adding Transit Counts

In [151]:
bars_and_restaurants_df_no_duplicates_utf8['Nearby_CityBikes_Stations'] = count_nearby_stations(establishments_coords, citybikes_coords, 1)


In [152]:
bars_and_restaurants_df_no_duplicates_utf8['Nearby_Subway_Stations'] = count_nearby_stations(establishments_coords, subway_coords, 1)


In [153]:
bars_and_restaurants_df_no_duplicates_utf8['Nearby_Light_Rail_Stations'] = count_nearby_stations(establishments_coords, light_rail_coords, 1)


In [156]:
bars_and_restaurants_mv_reg = bars_and_restaurants_df_no_duplicates_utf8


In [155]:
bars_and_restaurants_mv_reg['Nearby_Stations'] = count_nearby_stations(establishments_coords, transport_station_coords, 1)

In [159]:
bars_and_restaurants_mv_reg.to_csv('bars_and_restaurants_mv_reg')

In [187]:
bars_and_restaurants_mv_reg = pd.read_csv('bars_and_restaurants_mv_reg')

In [277]:
bars_and_restaurants_mv_reg.head(50)

Unnamed: 0.1,Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level,Geohash,Type,Nearby_CityBikes_Stations,Nearby_Subway_Stations,Nearby_Light_Rail_Stations,Nearby_Stations,Is Outstanding,Exorbitant In Price,Nearby_Rail_Stations
0,0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.5525,3.9,2.0,f25dy,Bar,15,3,0,18,0,0,3
1,1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1.0,f25dv,Bar,32,9,10,51,0,0,19
2,2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.51557,-73.564079,4.2,2.0,f25dy,Bar,17,7,1,25,0,0,8
3,3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.51173,-73.561981,3.8,2.0,f25dy,Bar,26,8,1,35,0,0,9
4,4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2.0,f25dy,Bar,18,6,0,24,0,0,6
5,5,Peel Pub - Sports Bar/Bar Sportif,"1196 Peel St, Montreal, Quebec H3B 2T6",45.499403,-73.571943,3.8,1.0,f25dv,Bar,28,7,10,45,0,0,17
6,6,Bar Macao,"1057 St Laurent Blvd, Montreal, Quebec H2Z 1J6",45.508327,-73.560176,3.5,1.0,f25dy,Bar,31,7,1,39,0,0,8
7,7,BreWskey Pub,"380 Rue Saint-Paul E, Montréal, QC H2Y 1H3",45.509398,-73.551386,4.5,2.0,f25dy,Bar,12,3,0,15,1,0,3
8,8,Bar Tipsy Cow,"3575 Av du Parc #4110, Montréal, QC H2X 3P9",45.510275,-73.575045,3.9,,f25dv,Bar,18,4,2,24,0,0,6
9,9,LE VIEUX/ THE OLD DUBLIN PUB & RESTURANT,"636 Rue Cathcart, Montréal, QC H3B 1L9",45.503008,-73.568646,4.0,2.0,f25dv,Bar,32,9,10,51,0,0,19


In [128]:
bars_in_montreal_collected_unique_utf8['Nearby_CityBikes_Stations'] = count_nearby_stations(bars_coords, citybikes_coords, 1)


In [129]:
bars_in_montreal_collected_unique_utf8['Nearby_Subway_Stations'] = count_nearby_stations(bars_coords, subway_coords, 1)


In [130]:
bars_in_montreal_collected_unique_utf8['Nearby_Light_Rail_Stations'] = count_nearby_stations(bars_coords, light_rail_coords, 1)


In [160]:
bars_in_montreal_collected_unique_utf8['Nearby_Stations'] = count_nearby_stations(bars_coords, transport_station_coords, 1)

In [161]:
bars_mv_reg = bars_in_montreal_collected_unique_utf8

In [173]:
bars_mv_reg.to_csv('bars_mv_reg')

In [163]:
bars_mv_reg.head(50)

Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level,Geohash,Nearby_CityBikes_Stations,Nearby_Subway_Stations,Nearby_Light_Rail_Stations,Nearby_Stations
0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.5525,3.9,2.0,f25dy,15,3,0,18
1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1.0,f25dv,32,9,10,51
2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.51557,-73.564079,4.2,2.0,f25dy,17,7,1,25
3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.51173,-73.561981,3.8,2.0,f25dy,26,8,1,35
4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2.0,f25dy,18,6,0,24
5,Peel Pub - Sports Bar/Bar Sportif,"1196 Peel St, Montreal, Quebec H3B 2T6",45.499403,-73.571943,3.8,1.0,f25dv,28,7,10,45
6,Bar Macao,"1057 St Laurent Blvd, Montreal, Quebec H2Z 1J6",45.508327,-73.560176,3.5,1.0,f25dy,31,7,1,39
7,BreWskey Pub,"380 Rue Saint-Paul E, Montréal, QC H2Y 1H3",45.509398,-73.551386,4.5,2.0,f25dy,12,3,0,15
8,Bar Tipsy Cow,"3575 Av du Parc #4110, Montréal, QC H2X 3P9",45.510275,-73.575045,3.9,,f25dv,18,4,2,24
9,LE VIEUX/ THE OLD DUBLIN PUB & RESTURANT,"636 Rue Cathcart, Montréal, QC H3B 1L9",45.503008,-73.568646,4.0,2.0,f25dv,32,9,10,51


In [132]:
restaurants_in_montreal_collected_unique_utf8['Nearby_CityBikes_Stations'] = count_nearby_stations(restaurants_coords, citybikes_coords, 1)


In [133]:
restaurants_in_montreal_collected_unique_utf8['Nearby_Subway_Stations'] = count_nearby_stations(restaurants_coords, subway_coords, 1)


In [134]:
restaurants_in_montreal_collected_unique_utf8['Nearby_Light_Rail_Stations'] = count_nearby_stations(restaurants_coords, light_rail_coords, 1)


In [164]:
restaurants_in_montreal_collected_unique_utf8['Nearby_Stations'] = count_nearby_stations(restaurants_coords, transport_station_coords, 1)

In [165]:
restaurants_mv_reg = restaurants_in_montreal_collected_unique_utf8

In [174]:
restaurants_mv_reg.to_csv('restaurants_mv_reg')

In [180]:
restaurants_mv_reg = pd.read_csv('restaurants_mv_reg')

In [181]:
restaurants_mv_reg.head(50)

Unnamed: 0.1,Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level,Geohash,Nearby_CityBikes_Stations,Nearby_Subway_Stations,Nearby_Light_Rail_Stations,Nearby_Stations
0,0,Restaurant Branzino,"50 René-Lévesque Blvd W, Montreal, Quebec H2Z 1A2",45.508147,-73.56165,3.9,,f25dy,30,8,6,44
1,1,Bar et Restaurant La Catrina - Vieux-Port,"119 De la Commune St W, Montreal, Quebec H2Y 2C7",45.50382,-73.553983,4.1,2.0,f25dy,19,3,1,23
2,2,COMMODORE restaurant Montréal,"355 Blvd. De Maisonneuve Ouest, Montreal, Queb...",45.507151,-73.569114,3.9,,f25dv,26,7,9,42
3,3,Restaurant Keung Kee,"70 Rue De la Gauchetière O, Montréal, QC H2Z 1C1",45.507253,-73.560407,3.9,2.0,f25dy,30,8,7,45
4,4,Beijing Restaurant,"92 Rue De la Gauchetière O, Montréal, QC H2Z 1C1",45.50693,-73.56056,4.1,2.0,f25dy,28,8,8,44
5,5,Restaurant Kamúy,"1485 Jeanne-Mance St, Montreal, Quebec H2X 1Z9",45.507866,-73.567355,4.4,,f25dv,28,9,8,45
6,6,Restaurant Boustan,"19 St Catherine St E, Montreal, Quebec H2X 1K3",45.510356,-73.563572,4.1,1.0,f25dy,27,9,2,38
7,7,Restaurants Lafleur Place des arts,"268 Saint-Catherine St W, Montreal, Quebec H2X...",45.506916,-73.566146,3.5,1.0,f25dv,27,10,10,47
8,8,Poulet Rouge,"1750 Saint Denis St, Montreal, Quebec H2X 3K6",45.515736,-73.565138,4.4,2.0,f25dv,17,7,1,25
9,9,Loaded Pierogi,"1748 Saint Denis St, Montreal, Quebec H2X 3K6",45.515661,-73.565007,4.0,,f25dv,17,7,1,25


## Multivariable Regression Analysis

### First do the analysis for all establishments

In [278]:
bars_and_restaurants_mv_reg = bars_and_restaurants_mv_reg.dropna(subset=['Nearby_CityBikes_Stations', 'Nearby_Subway_Stations', 'Nearby_Light_Rail_Stations', 'Rating'])


In [279]:
X = bars_and_restaurants_mv_reg[['Nearby_CityBikes_Stations', 'Nearby_Subway_Stations', 'Nearby_Light_Rail_Stations']]
y = bars_and_restaurants_mv_reg['Rating']


In [280]:
# Add a constant
X = sm.add_constant(X)

In [281]:
# Create the model
model = sm.OLS(y, X).fit()

In [283]:
# Print out the statistics
model_summary = model.summary()
print(model_summary)

                            OLS Regression Results                            
Dep. Variable:                 Rating   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     5.637
Date:                Sun, 24 Mar 2024   Prob (F-statistic):           0.000752
Time:                        22:49:27   Log-Likelihood:                -4281.4
No. Observations:                3191   AIC:                             8571.
Df Residuals:                    3187   BIC:                             8595.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

In [285]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('multivariable_regression_results_b_r.csv', 'w') as f:
    f.write(summary_as_csv)

#### This is a very low R squared value. I will check if eliminating subways stations, which apparently is not a statistically significant factor, improves results

In [286]:
X = bars_and_restaurants_df_no_duplicates_utf8[['Nearby_CityBikes_Stations', 'Nearby_Light_Rail_Stations']]
y = bars_and_restaurants_df_no_duplicates_utf8['Rating']



In [287]:
# Add a constant
X = sm.add_constant(X)

In [288]:
# Create the model
model = sm.OLS(y, X).fit()

In [289]:
# Print out the statistics
model_summary = model.summary()
print(model_summary)

                            OLS Regression Results                            
Dep. Variable:                 Rating   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     7.997
Date:                Sun, 24 Mar 2024   Prob (F-statistic):           0.000343
Time:                        22:52:25   Log-Likelihood:                -4281.8
No. Observations:                3191   AIC:                             8570.
Df Residuals:                    3188   BIC:                             8588.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

In [290]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('multivariable_regression_results_b_r_no_subway.csv', 'w') as f:
    f.write(summary_as_csv)

#### Removing subway stations as an independent variable did not make the results more significant

### Now do regression for just bars.

In [291]:
bars_in_montreal_dropna = bars_mv_reg.dropna(subset=['Nearby_CityBikes_Stations', 'Nearby_Subway_Stations', 'Nearby_Light_Rail_Stations', 'Rating'])

In [292]:
X = bars_in_montreal_dropna[['Nearby_CityBikes_Stations', 'Nearby_Subway_Stations', 'Nearby_Light_Rail_Stations']]
y = bars_in_montreal_dropna['Rating']


In [293]:
# Add a constant
X = sm.add_constant(X)

In [294]:
# Create the model
model = sm.OLS(y, X).fit()

In [296]:
# Print out the statistics
model_summary = model.summary()
print(model_summary)

                            OLS Regression Results                            
Dep. Variable:                 Rating   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.016
Method:                 Least Squares   F-statistic:                     3.879
Date:                Sun, 24 Mar 2024   Prob (F-statistic):            0.00923
Time:                        22:53:35   Log-Likelihood:                -455.75
No. Observations:                 523   AIC:                             919.5
Df Residuals:                     519   BIC:                             936.5
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

In [297]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('multivariable_regression_results_b.csv', 'w') as f:
    f.write(summary_as_csv)

In [298]:
X = bars_in_montreal_dropna[['Nearby_Subway_Stations', 'Nearby_Light_Rail_Stations']]
y = bars_in_montreal_dropna['Rating']

In [299]:
# Add a constant
X = sm.add_constant(X)

In [300]:
# Create the model
model = sm.OLS(y, X).fit()

In [301]:
# Print out the statistics
model_summary = model.summary()
print(model_summary)


                            OLS Regression Results                            
Dep. Variable:                 Rating   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     5.827
Date:                Sun, 24 Mar 2024   Prob (F-statistic):            0.00314
Time:                        22:54:27   Log-Likelihood:                -455.75
No. Observations:                 523   AIC:                             917.5
Df Residuals:                     520   BIC:                             930.3
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

In [302]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('multivariable_regression_results_b_no_bikes.csv', 'w') as f:
    f.write(summary_as_csv)

### Now do regression for just restaurants.

In [303]:
restaurants_mv_reg = restaurants_mv_reg.dropna(subset=['Nearby_CityBikes_Stations', 'Nearby_Subway_Stations', 'Nearby_Light_Rail_Stations', 'Rating'])

In [304]:
X = restaurants_mv_reg[['Nearby_CityBikes_Stations', 'Nearby_Subway_Stations', 'Nearby_Light_Rail_Stations']]
y = restaurants_mv_reg['Rating']

In [305]:
# Add a constant
X = sm.add_constant(X)

In [306]:
# Create the model
model = sm.OLS(y, X).fit()

In [307]:
# Print out the statistics
model_summary = model.summary()
print(model_summary)


                            OLS Regression Results                            
Dep. Variable:                 Rating   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     4.245
Date:                Sun, 24 Mar 2024   Prob (F-statistic):            0.00531
Time:                        22:55:04   Log-Likelihood:                -3825.4
No. Observations:                2772   AIC:                             7659.
Df Residuals:                    2768   BIC:                             7682.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

In [308]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('multivariable_regression_results_r.csv', 'w') as f:
    f.write(summary_as_csv)

#### Dropping Subway Stations As It Is Not Statistically Significant

In [320]:
X = restaurants_mv_reg[['Nearby_CityBikes_Stations', 'Nearby_Light_Rail_Stations']]
y = restaurants_mv_reg['Rating']

In [321]:
# Add a constant
X = sm.add_constant(X)

In [322]:
# Create the model
model = sm.OLS(y, X).fit()

In [323]:
# Print out the statistics
model_summary = model.summary()
print(model_summary)


                            OLS Regression Results                            
Dep. Variable:                 Rating   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     5.332
Date:                Sun, 24 Mar 2024   Prob (F-statistic):            0.00488
Time:                        22:59:23   Log-Likelihood:                -3826.4
No. Observations:                2772   AIC:                             7659.
Df Residuals:                    2769   BIC:                             7677.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

In [325]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('multivariable_regression_results_r_no_subway.csv', 'w') as f:
    f.write(summary_as_csv)

#### Dropping Light Rail Stations As Well

In [326]:
X = restaurants_mv_reg[['Nearby_CityBikes_Stations']]
y = restaurants_mv_reg['Rating']

In [327]:
# Add a constant
X = sm.add_constant(X)

In [328]:
# Create the model
model = sm.OLS(y, X).fit()

In [329]:
# Print out the statistics
model_summary = model.summary()
print(model_summary)

                            OLS Regression Results                            
Dep. Variable:                 Rating   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     7.269
Date:                Sun, 24 Mar 2024   Prob (F-statistic):            0.00706
Time:                        23:00:40   Log-Likelihood:                -3828.1
No. Observations:                2772   AIC:                             7660.
Df Residuals:                    2770   BIC:                             7672.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

In [330]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('simple_regression_results_r_citybikes.csv', 'w') as f:
    f.write(summary_as_csv)

## I will now perform a logistic regression analysis to see if the number of nearby transit stations is predictive of whether or not an establishment is rated as "Outstanding" (rating of 4.5 or above) or is exhorbiant in price (has a price level of 4).

In [331]:
bars_and_restaurants_mv_reg['Exorbitant In Price'] = (bars_and_restaurants_mv_reg['Price Level'] >= 4.0).astype(int)

In [332]:
bars_and_restaurants_mv_reg['Is Outstanding'] = (bars_and_restaurants_mv_reg['Rating'] >= 4.5).astype(int)

In [333]:
bars_and_restaurants_mv_reg['Nearby_Rail_Stations'] = (bars_and_restaurants_mv_reg['Nearby_Subway_Stations'] + bars_and_restaurants_mv_reg['Nearby_Light_Rail_Stations'])

In [334]:
bars_and_restaurants_mv_reg

Unnamed: 0.1,Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level,Geohash,Type,Nearby_CityBikes_Stations,Nearby_Subway_Stations,Nearby_Light_Rail_Stations,Nearby_Stations,Is Outstanding,Exorbitant In Price,Nearby_Rail_Stations
0,0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.552500,3.9,2.0,f25dy,Bar,15,3,0,18,0,0,3
1,1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1.0,f25dv,Bar,32,9,10,51,0,0,19
2,2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.515570,-73.564079,4.2,2.0,f25dy,Bar,17,7,1,25,0,0,8
3,3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.511730,-73.561981,3.8,2.0,f25dy,Bar,26,8,1,35,0,0,9
4,4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2.0,f25dy,Bar,18,6,0,24,0,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3186,3186,Les Enfants Terribles — Outremont,"1257 Bernard Ave, Outremont, Quebec H2V 1V8",45.519924,-73.609135,3.9,3.0,f25du,Restaurant,2,1,0,3,0,0,1
3187,3187,Pho Bep Cuisine Vietnamienne,"102 Rue Saint-Viateur O, Montréal, QC H2T 2L1",45.524414,-73.600046,4.5,,f25dv,Restaurant,11,1,0,12,1,0,1
3188,3188,Yuukai sushi,"5407 Park Ave, Montreal, Quebec H2V 4G9",45.522069,-73.601220,4.5,2.0,f25dv,Restaurant,8,0,0,8,1,0,0
3189,3189,Pizzeria Magpie Mile End,"16 Maguire St, Montreal, Quebec H2T 1B8",45.524703,-73.595522,4.4,2.0,f25dv,Restaurant,14,2,0,16,0,0,2


In [194]:
bars_mv_reg['Exorbitant In Price'] = (bars_mv_reg['Price Level'] >= 4.0).astype(int)

In [195]:
bars_mv_reg['Is Outstanding'] = (bars_mv_reg['Rating'] >= 4.0).astype(int)

In [242]:
bars_mv_reg['Nearby_Rail_Stations'] = (bars_mv_reg['Nearby_Subway_Stations'] + bars_mv_reg['Nearby_Light_Rail_Stations'])

In [243]:
bars_mv_reg

Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level,Geohash,Nearby_CityBikes_Stations,Nearby_Subway_Stations,Nearby_Light_Rail_Stations,Nearby_Stations,Exorbitant In Price,Is Outstanding,Nearby_Rail_Stations
0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.552500,3.9,2.0,f25dy,15,3,0,18,0,0,3
1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1.0,f25dv,32,9,10,51,0,1,19
2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.515570,-73.564079,4.2,2.0,f25dy,17,7,1,25,0,1,8
3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.511730,-73.561981,3.8,2.0,f25dy,26,8,1,35,0,0,9
4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2.0,f25dy,18,6,0,24,0,1,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
518,M Resto-Bar,"3625 Rue Talon, Sainte-Catherine, QC J5C 1T7",45.387405,-73.553055,4.4,2.0,f25dn,0,0,0,0,0,1,0
519,BLVD Bar & Gril,"100 Rue Kepler, Châteauguay, QC J6K 2X9",45.360540,-73.712815,4.5,2.0,f25d4,0,0,0,0,0,1,0
520,Barbossa,"3956 A St Laurent Blvd, Montreal, Quebec H2W 1Y3",45.516651,-73.578985,4.1,2.0,f25dv,10,2,0,12,0,1,2
521,KARAOKE BAR 3 MINOTS,"3812 St Laurent Blvd, Montreal, Quebec H2W 1X6",45.515533,-73.576472,3.4,1.0,f25dv,13,1,0,14,0,0,1


In [197]:
restaurants_mv_reg['Exorbitant In Price'] = (restaurants_mv_reg['Price Level'] >= 4.0).astype(int)

In [198]:
restaurants_mv_reg['Is Outstanding'] = (restaurants_mv_reg['Rating'] >= 4.0).astype(int)

In [245]:
restaurants_mv_reg['Nearby_Rail_Stations'] = (restaurants_mv_reg['Nearby_Subway_Stations'] + restaurants_mv_reg['Nearby_Light_Rail_Stations'])

In [246]:
restaurants_mv_reg

Unnamed: 0.1,Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level,Geohash,Nearby_CityBikes_Stations,Nearby_Subway_Stations,Nearby_Light_Rail_Stations,Nearby_Stations,Exorbitant In Price,Is Outstanding,Nearby_Rail_Stations
0,0,Restaurant Branzino,"50 René-Lévesque Blvd W, Montreal, Quebec H2Z 1A2",45.508147,-73.561650,3.9,,f25dy,30,8,6,44,0,0,14
1,1,Bar et Restaurant La Catrina - Vieux-Port,"119 De la Commune St W, Montreal, Quebec H2Y 2C7",45.503820,-73.553983,4.1,2.0,f25dy,19,3,1,23,0,1,4
2,2,COMMODORE restaurant Montréal,"355 Blvd. De Maisonneuve Ouest, Montreal, Queb...",45.507151,-73.569114,3.9,,f25dv,26,7,9,42,0,0,16
3,3,Restaurant Keung Kee,"70 Rue De la Gauchetière O, Montréal, QC H2Z 1C1",45.507253,-73.560407,3.9,2.0,f25dy,30,8,7,45,0,0,15
4,4,Beijing Restaurant,"92 Rue De la Gauchetière O, Montréal, QC H2Z 1C1",45.506930,-73.560560,4.1,2.0,f25dy,28,8,8,44,0,1,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2767,2767,Les Enfants Terribles — Outremont,"1257 Bernard Ave, Outremont, Quebec H2V 1V8",45.519924,-73.609135,3.9,3.0,f25du,2,1,0,3,0,0,1
2768,2768,Pho Bep Cuisine Vietnamienne,"102 Rue Saint-Viateur O, Montréal, QC H2T 2L1",45.524414,-73.600046,4.5,,f25dv,11,1,0,12,0,1,1
2769,2769,Yuukai sushi,"5407 Park Ave, Montreal, Quebec H2V 4G9",45.522069,-73.601220,4.5,2.0,f25dv,8,0,0,8,0,1,0
2770,2770,Pizzeria Magpie Mile End,"16 Maguire St, Montreal, Quebec H2T 1B8",45.524703,-73.595522,4.4,2.0,f25dv,14,2,0,16,0,1,2


### Logistic Regression For Bars And Restaurants For All Transit

In [335]:
X = bars_and_restaurants_mv_reg[['Nearby_Stations']]  # Independent variable
y = bars_and_restaurants_mv_reg['Is Outstanding']     # Dependent variable

In [336]:
X = sm.add_constant(X)

In [337]:
logit_model = sm.Logit(y, X)

In [338]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.590370
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:         Is Outstanding   No. Observations:                 3191
Model:                          Logit   Df Residuals:                     3189
Method:                           MLE   Df Model:                            1
Date:                Sun, 24 Mar 2024   Pseudo R-squ.:               9.468e-05
Time:                        23:02:30   Log-Likelihood:                -1883.9
converged:                       True   LL-Null:                       -1884.0
Covariance Type:            nonrobust   LLR p-value:                    0.5503
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
const              -0.9747      0.049    -19.981      0.000      -1.070      -0.879
Nearby_Station

In [346]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_outstanding_all_stations_b_r.csv', 'w') as f:
    f.write(summary_as_csv)

In [340]:
X = bars_and_restaurants_mv_reg[['Nearby_Stations']]  # Independent variable
y = bars_and_restaurants_mv_reg['Exorbitant In Price']     # Dependent variable

In [341]:
X = sm.add_constant(X)

In [342]:
logit_model = sm.Logit(y, X)

In [343]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.033484
         Iterations 10
                            Logit Regression Results                           
Dep. Variable:     Exorbitant In Price   No. Observations:                 3191
Model:                           Logit   Df Residuals:                     3189
Method:                            MLE   Df Model:                            1
Date:                 Sun, 24 Mar 2024   Pseudo R-squ.:                 0.08122
Time:                         23:04:44   Log-Likelihood:                -106.85
converged:                        True   LL-Null:                       -116.29
Covariance Type:             nonrobust   LLR p-value:                 1.385e-05
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
const              -6.0005      0.371    -16.184      0.000      -6.727      -5.274
Nearb

In [347]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_exorbitant_all_stations_b_r.csv', 'w') as f:
    f.write(summary_as_csv)

In [348]:
X = bars_mv_reg[['Nearby_Stations']]  # Independent variable
y = bars_mv_reg['Is Outstanding']     # Dependent variable

In [349]:
X = sm.add_constant(X)

In [350]:
logit_model = sm.Logit(y, X)

In [351]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.574374
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:         Is Outstanding   No. Observations:                  523
Model:                          Logit   Df Residuals:                      521
Method:                           MLE   Df Model:                            1
Date:                Sun, 24 Mar 2024   Pseudo R-squ.:                0.008008
Time:                        23:07:23   Log-Likelihood:                -300.40
converged:                       True   LL-Null:                       -302.82
Covariance Type:            nonrobust   LLR p-value:                   0.02765
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               0.8073      0.135      5.969      0.000       0.542       1.072
Nearby_Station

In [352]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_outstanding_all_stations_b.csv', 'w') as f:
    f.write(summary_as_csv)

In [358]:
X = restaurants_mv_reg[['Nearby_Stations']]  # Independent variable
y = restaurants_mv_reg['Is Outstanding']  # Dependent variable

In [359]:
X = sm.add_constant(X)

In [360]:
logit_model = sm.Logit(y, X)

In [361]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.614678
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:         Is Outstanding   No. Observations:                 2772
Model:                          Logit   Df Residuals:                     2770
Method:                           MLE   Df Model:                            1
Date:                Sun, 24 Mar 2024   Pseudo R-squ.:               0.0008510
Time:                        23:09:44   Log-Likelihood:                -1703.9
converged:                       True   LL-Null:                       -1705.3
Covariance Type:            nonrobust   LLR p-value:                   0.08845
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               0.7752      0.050     15.620      0.000       0.678       0.872
Nearby_Station

In [362]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_outstanding_all_stations_r.csv', 'w') as f:
    f.write(summary_as_csv)

### Logistic Regression For Bars And Restaurants For All Rail

In [363]:
X = bars_and_restaurants_mv_reg[['Nearby_Rail_Stations']]  # Independent variable
y = bars_and_restaurants_mv_reg['Is Outstanding']     # Dependent variable

In [364]:
X = sm.add_constant(X)

In [365]:
logit_model = sm.Logit(y, X)

In [366]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.590333
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:         Is Outstanding   No. Observations:                 3191
Model:                          Logit   Df Residuals:                     3189
Method:                           MLE   Df Model:                            1
Date:                Sun, 24 Mar 2024   Pseudo R-squ.:               0.0001581
Time:                        23:16:53   Log-Likelihood:                -1883.8
converged:                       True   LL-Null:                       -1884.0
Covariance Type:            nonrobust   LLR p-value:                    0.4402
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                   -0.9373      0.048    -19.727      0.000      -1.030      -0.844

In [367]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_outstanding_all_rail_b_r.csv', 'w') as f:
    f.write(summary_as_csv)

In [368]:
X = bars_and_restaurants_mv_reg[['Nearby_Rail_Stations']]  # Independent variable
y = bars_and_restaurants_mv_reg['Exorbitant In Price']     # Dependent variable

In [369]:
X = sm.add_constant(X)

In [370]:
logit_model = sm.Logit(y, X)

In [371]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.033249
         Iterations 10
                            Logit Regression Results                           
Dep. Variable:     Exorbitant In Price   No. Observations:                 3191
Model:                           Logit   Df Residuals:                     3189
Method:                            MLE   Df Model:                            1
Date:                 Sun, 24 Mar 2024   Pseudo R-squ.:                 0.08766
Time:                         23:18:05   Log-Likelihood:                -106.10
converged:                        True   LL-Null:                       -116.29
Covariance Type:             nonrobust   LLR p-value:                 6.322e-06
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                   -5.9298      0.351    -16.903      0.000      -6.617   

In [373]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_exorbitant_all_rail_b_r.csv', 'w') as f:
    f.write(summary_as_csv)

In [374]:
X = bars_mv_reg[['Nearby_Rail_Stations']]  # Independent variable
y = bars_mv_reg['Is Outstanding']     # Dependent variable

In [375]:
X = sm.add_constant(X)

In [376]:
logit_model = sm.Logit(y, X)

In [377]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.575835
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:         Is Outstanding   No. Observations:                  523
Model:                          Logit   Df Residuals:                      521
Method:                           MLE   Df Model:                            1
Date:                Sun, 24 Mar 2024   Pseudo R-squ.:                0.005484
Time:                        23:19:10   Log-Likelihood:                -301.16
converged:                       True   LL-Null:                       -302.82
Covariance Type:            nonrobust   LLR p-value:                   0.06839
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                    0.8738      0.125      6.996      0.000       0.629       1.119

In [378]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_outstanding_all_rail_b.csv', 'w') as f:
    f.write(summary_as_csv)

In [379]:
X = restaurants_mv_reg[['Nearby_Rail_Stations']]  # Independent variable
y = restaurants_mv_reg['Is Outstanding']     # Dependent variable

In [380]:
X = sm.add_constant(X)

In [381]:
logit_model = sm.Logit(y, X)

In [382]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.615201
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:         Is Outstanding   No. Observations:                 2772
Model:                          Logit   Df Residuals:                     2770
Method:                           MLE   Df Model:                            1
Date:                Sun, 24 Mar 2024   Pseudo R-squ.:               6.589e-07
Time:                        23:21:09   Log-Likelihood:                -1705.3
converged:                       True   LL-Null:                       -1705.3
Covariance Type:            nonrobust   LLR p-value:                    0.9622
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                    0.8214      0.049     16.710      0.000       0.725       0.918

In [383]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_outstanding_all_rail_r.csv', 'w') as f:
    f.write(summary_as_csv)

In [384]:
X = restaurants_mv_reg[['Nearby_Rail_Stations']]  # Independent variable
y = restaurants_mv_reg['Exorbitant In Price']     # Dependent variable

In [385]:
X = sm.add_constant(X)

In [386]:
logit_model = sm.Logit(y, X)

In [387]:
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.036784
         Iterations 10
                            Logit Regression Results                           
Dep. Variable:     Exorbitant In Price   No. Observations:                 2772
Model:                           Logit   Df Residuals:                     2770
Method:                            MLE   Df Model:                            1
Date:                 Sun, 24 Mar 2024   Pseudo R-squ.:                  0.1025
Time:                         23:24:38   Log-Likelihood:                -101.97
converged:                        True   LL-Null:                       -113.61
Covariance Type:             nonrobust   LLR p-value:                 1.396e-06
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                   -5.8124      0.349    -16.640      0.000      -6.497   

In [388]:
summary = result.summary()
summary_as_csv = summary.as_csv()

with open('logistic_is_exorbitant_all_rail_r.csv', 'w') as f:
    f.write(summary_as_csv)