# Nearest Open Pubs in UK

In [1]:
import os
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
os.chdir(r"D:\Ritwik's Download\Innomatics Research Lab")

In [3]:
df = pd.read_csv("open_pubs.csv", header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,\N,\N,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,\N,\N,Babergh
...,...,...,...,...,...,...,...,...,...
51326,597119,Wrexham & District War Memorial Club Ltd,"Wrexham War Memorial Club Farndon Street, Wrex...",LL13 8DE,333909,350438,53.047100,-2.987319,Wrexham
51327,597127,Wrexham Lager Social Club,"1 - 3 Union Road, Wrexham, Wrexham",LL13 7SR,333028,350563,53.048123,-3.000485,Wrexham
51328,597130,Wrexham Rail Sports & Social Club,"Sports And Social Club, 44 Brook Street, Wrexh...",LL13 7LU,333259,350213,53.044998,-2.996966,Wrexham
51329,597131,Wrexham Rugby Club,"Wrexham Rugby Club Bryn Estyn Road, Wrexham, W...",LL13 9TY,335808,351078,53.053094,-2.959124,Wrexham


In [4]:
df.columns = ['fsa_id','name','address','postcode','easting','northing','latitude','longitude','local_authority']
df

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,\N,\N,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,\N,\N,Babergh
...,...,...,...,...,...,...,...,...,...
51326,597119,Wrexham & District War Memorial Club Ltd,"Wrexham War Memorial Club Farndon Street, Wrex...",LL13 8DE,333909,350438,53.047100,-2.987319,Wrexham
51327,597127,Wrexham Lager Social Club,"1 - 3 Union Road, Wrexham, Wrexham",LL13 7SR,333028,350563,53.048123,-3.000485,Wrexham
51328,597130,Wrexham Rail Sports & Social Club,"Sports And Social Club, 44 Brook Street, Wrexh...",LL13 7LU,333259,350213,53.044998,-2.996966,Wrexham
51329,597131,Wrexham Rugby Club,"Wrexham Rugby Club Bryn Estyn Road, Wrexham, W...",LL13 9TY,335808,351078,53.053094,-2.959124,Wrexham


In [5]:
df.name.unique().shape

(36335,)

In [6]:
len(df.postcode.unique())

45886

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51331 entries, 0 to 51330
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   fsa_id           51331 non-null  int64 
 1   name             51331 non-null  object
 2   address          51331 non-null  object
 3   postcode         51331 non-null  object
 4   easting          51331 non-null  int64 
 5   northing         51331 non-null  int64 
 6   latitude         51331 non-null  object
 7   longitude        51331 non-null  object
 8   local_authority  51331 non-null  object
dtypes: int64(3), object(6)
memory usage: 3.5+ MB


### We can see there are some missing values in columns which are represented by $\N$ so we need to replace the missing value to Null

**Replace The Missing Value**

In [8]:
def clean_missing(x):
    if x == r'\N':
        return None
    else:
        return x

In [9]:
df.columns

Index(['fsa_id', 'name', 'address', 'postcode', 'easting', 'northing',
       'latitude', 'longitude', 'local_authority'],
      dtype='object')

In [10]:
df.fsa_id = df.fsa_id.apply(clean_missing)
df.name = df.name.apply(clean_missing)
df.address = df.address.apply(clean_missing)
df.postcode = df.postcode.apply(clean_missing)
df.easting = df.easting.apply(clean_missing)
df.northing = df.northing.apply(clean_missing)
df.latitude = df.latitude.apply(clean_missing)
df.longitude = df.longitude.apply(clean_missing)
df.local_authority = df.local_authority.apply(clean_missing)

In [11]:
df.isnull().sum()

fsa_id               0
name                 0
address              0
postcode             0
easting              0
northing             0
latitude           767
longitude          767
local_authority      0
dtype: int64

In [12]:
df.dropna(axis=0, inplace=True)

In [13]:
df.shape

(50564, 9)

In [14]:
df.isnull().sum()

fsa_id             0
name               0
address            0
postcode           0
easting            0
northing           0
latitude           0
longitude          0
local_authority    0
dtype: int64

In [15]:
df.dtypes

fsa_id              int64
name               object
address            object
postcode           object
easting             int64
northing            int64
latitude           object
longitude          object
local_authority    object
dtype: object

In [16]:
df.latitude = df.latitude.astype("float")
df.longitude = df.longitude.astype("float")

In [17]:
df

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
5,97,Brewers Arms,"The Brewers Arms, Bower House Tye, Polstead, C...",CO6 5BZ,598743,240655,52.028694,0.895650,Babergh
6,102,Bristol Arms,"Bristol Hill, Shotley, IPSWICH",IP9 1PU,624624,233550,51.955042,1.267642,Babergh
...,...,...,...,...,...,...,...,...,...
51326,597119,Wrexham & District War Memorial Club Ltd,"Wrexham War Memorial Club Farndon Street, Wrex...",LL13 8DE,333909,350438,53.047100,-2.987319,Wrexham
51327,597127,Wrexham Lager Social Club,"1 - 3 Union Road, Wrexham, Wrexham",LL13 7SR,333028,350563,53.048123,-3.000485,Wrexham
51328,597130,Wrexham Rail Sports & Social Club,"Sports And Social Club, 44 Brook Street, Wrexh...",LL13 7LU,333259,350213,53.044998,-2.996966,Wrexham
51329,597131,Wrexham Rugby Club,"Wrexham Rugby Club Bryn Estyn Road, Wrexham, W...",LL13 9TY,335808,351078,53.053094,-2.959124,Wrexham


In [223]:
df.name.value_counts()[:50]

The Red Lion                 193
The Royal Oak                170
Red Lion                     130
The Crown Inn                112
Royal Oak                    102
The Crown                    101
The White Hart               101
The New Inn                   93
The Plough Inn                92
The Wheatsheaf                89
The Plough                    84
Kings Arms                    83
The White Horse               82
The Ship Inn                  81
Royal British Legion          80
New Inn                       80
Crown Inn                     76
The Swan                      76
The Bell Inn                  73
The Kings Head                71
The Queens Head               68
The Kings Arms                67
The Royal British Legion      62
Queens Head                   58
The Bell                      58
Railway Inn                   56
The Swan Inn                  56
Ship Inn                      55
Kings Head                    55
White Hart                    54
The White 

In [24]:
loc = df.local_authority.value_counts()[:15]
loc

County Durham               680
Leeds                       611
Cornwall                    557
Wiltshire                   544
Kirklees                    524
City of Edinburgh           506
Birmingham                  494
Buckinghamshire             485
Bradford                    478
Northumberland              446
East Riding of Yorkshire    432
Shropshire                  415
Bristol, City of            408
Dorset                      405
Fife                        404
Name: local_authority, dtype: int64

In [25]:
fig = px.bar(df, x = loc.index, y=loc.values, color = loc.index)
fig.update_layout(
    title = "<b> Numbers of Pubs in Locality Wise (TOP 10) </b>",
    xaxis_title = "Name Local Authorities",
    yaxis_title = "Count of Pubs",
    legend_title = "Local_Authority",
    paper_bgcolor = "lightgreen",
    plot_bgcolor = "cyan"
    
)
fig.show()

In [142]:
nam = df.name.value_counts()[:10]
nam

The Red Lion      193
The Royal Oak     170
Red Lion          130
The Crown Inn     112
Royal Oak         102
The Crown         101
The White Hart    101
The New Inn        93
The Plough Inn     92
The Wheatsheaf     89
Name: name, dtype: int64

In [19]:
import folium

In [20]:
mymap = folium.Map(location=[23.2164, 88.3529], zoom_start=12)


folium.Marker(location=[23.2164, 88.3529], icon=folium.Icon(icon='beer', prefix='fa', color='green'), popup='Your Location').add_to(mymap)

mymap

### User input section to find the TOP5 Pubs

In [145]:
df.latitude.max()

60.764969

In [197]:
df.latitude.min()

49.892485

In [146]:
df.longitude.min()

-7.384525

In [198]:
df.longitude.max()

1.757763

In [200]:
lat = float(input())
long = float(input())
ans = np.array((lat,long))
ans

59.6589
-5.63978


array([59.6589 , -5.63978])

In [201]:
arr = np.transpose(np.array([df.latitude, df.longitude]))
ans - arr[0]

array([ 7.688521, -6.61912 ])

**Finding The Distance**

In [202]:
dis = np.sqrt(np.sum((arr-ans)**2, axis=1))
dis = dis.tolist()

In [203]:
newdis = [round(i, 4) for i in dis]
newdis

[10.1453,
 10.2054,
 9.9319,
 10.0465,
 10.3471,
 9.9342,
 10.1712,
 9.8939,
 10.0735,
 10.0735,
 10.0137,
 9.8468,
 9.9312,
 10.167,
 10.0355,
 10.1255,
 9.9944,
 9.9507,
 9.8987,
 9.9433,
 9.9296,
 10.2106,
 10.2609,
 10.1918,
 10.324,
 9.9324,
 9.9012,
 9.9329,
 10.0232,
 10.0763,
 9.8496,
 9.9273,
 10.1621,
 9.9325,
 9.9564,
 10.2182,
 9.8563,
 10.2803,
 9.9211,
 10.0748,
 10.067,
 9.9549,
 9.9335,
 9.9288,
 10.207,
 9.9985,
 9.9481,
 10.2355,
 10.0736,
 9.8947,
 10.1901,
 9.9208,
 9.8904,
 10.2806,
 9.9325,
 9.8916,
 10.0716,
 10.2421,
 10.0086,
 10.1648,
 9.994,
 10.1427,
 10.0908,
 9.8952,
 10.0737,
 9.9707,
 9.9449,
 9.8935,
 10.2505,
 9.8409,
 10.2081,
 10.0124,
 10.1802,
 9.9295,
 10.2099,
 10.0289,
 9.9312,
 10.1714,
 10.1087,
 10.1453,
 10.1283,
 10.1288,
 10.1297,
 10.1283,
 10.1299,
 10.1746,
 10.0973,
 10.0601,
 10.0534,
 10.1619,
 10.1313,
 10.1266,
 10.0609,
 10.1391,
 10.1682,
 10.1309,
 10.1205,
 10.1266,
 10.1553,
 10.1271,
 10.0928,
 10.1204,
 10.06,
 10.1343,
 10.

In [204]:
data = df.copy()

In [205]:
data["Distance"] = newdis

In [206]:
data.head(3)

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority,Distance
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.97934,Babergh,10.1453
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh,10.2054
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh,9.9319


In [207]:
top_5_pubs = data.sort_values(by='Distance').head(5)
top_5_pubs

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority,Distance
44632,515685,Ness F C Social Club,"Ness F.C. Social Club, Machair, Fivepenny, Ness",HS2 0XB,152853,963481,58.489238,-6.242497,Na h-Eileanan Siar,1.3158
46400,539271,Am Fuaran Bar,"Altandhu, Achiltibuie",IV26 2YR,198435,912602,58.056551,-5.417909,Highland,1.6176
44633,515760,Stornoway Golf Club,"Stornoway Golf Club, Castle Grounds, Stornoway...",HS2 0XP,141939,933664,58.215919,-6.395793,Na h-Eileanan Siar,1.629
44630,515662,Masonic Hall,"Lodge Fortrose Masonic Hall, 57 Kenneth Street...",HS1 2DS,142360,933050,58.210666,-6.387969,Na h-Eileanan Siar,1.6301
44627,515616,Lewis & Harris Rangers Sports & Social Club,"19 Inaclete Road, Stornoway, Isle Of Lewis",HS1 2RB,142759,932567,58.206565,-6.380664,Na h-Eileanan Siar,1.6304


### To check if my location is at the land

In [193]:
from geopy.point import Point
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import requests

# Create a function to check if a point is on land
def is_on_land(lat, lon):
    try:
        # Create a geopy Point object for the given lat/lon
        point = Point(lat, lon)
        
        # Use Nominatim geocoder to reverse geocode the point and get the location name
        geolocator = Nominatim(user_agent="my_app")
        location = geolocator.reverse(f"{lat}, {lon}")
        
        # Use the location name to query the OpenStreetMap Nominatim API and get the place type
        url = f"https://nominatim.openstreetmap.org/search?q={location}&format=json&limit=1"
        response = requests.get(url).json()
        place_type = response[0]['type']
        
        # Return True if the place type is land
        return place_type == "land"
    
    except GeocoderTimedOut:
        # If geocoding fails, return False
        return False

# Test the function with a sample point
lat, lon = lat, long
is_land = is_on_land(lat, lon)
print(is_land)


False


In [176]:
import folium

In [214]:
m = folium.Map(location=[lat, long], zoom_start=13)
folium.Marker(location=[lat, long], icon = folium.Icon(icon='star', color='red'), popup='Your Location').add_to(m)

for i, row in top_5_pubs.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"<strong>{row['name']}</strong><br>{row['postcode']}<br>{row['local_authority']}<br>{row['Distance']} kelometers away",
        icon=folium.Icon(icon='beer', prefix='fa', color='green')
    ).add_to(m)

display(m)

### Now want to see the total pubs in one local authority

In [220]:
data = data[data["local_authority"] == "County Durham"]
len(data)

680

In [218]:
m = folium.Map(location=[data.latitude.mean(), data.longitude.mean()], zoom_start=13)
folium.Marker(location=[data.latitude.mean(), data.longitude.mean()], icon = folium.Icon(icon='star', color='red'), popup='Your Location', size = 155).add_to(m)

for i, row in data.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"<strong>{row['name']}</strong><br>{row['postcode']}<br>{row['local_authority']}<br>{row['Distance']} kelometers away",
        icon=folium.Icon(icon='beer', prefix='fa', color='green')
    ).add_to(m)

display(m)

In [219]:
df.to_csv("Cleaned_data.csv")