# Nearest Pub (Streamlit)

In [1]:
# Importing important libraries
import pandas as pd
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


In [4]:
# Loading dataset
df_gc = pd.read_csv("/drive/My Drive/Colab Notebooks/nearestpub_streamlit/open_pubs.csv")

In [5]:
# Naming columns as our data has no column names
df_gc.columns = ['fsa_id', 'name', 'address', 'postcode', 'easting', 'northing', 'latitude', 'longitude', 'local_authority']

In [6]:
# first 5 rows
df_gc.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
1,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
2,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,\N,\N,Babergh
3,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,\N,\N,Babergh
4,97,Brewers Arms,"The Brewers Arms, Bower House Tye, Polstead, C...",CO6 5BZ,598743,240655,52.028694,0.895650,Babergh


In [7]:
# shape of dataframe
df_gc.shape

(51330, 9)

In [8]:
# Checking if null values
df_gc.isnull().sum()

fsa_id             0
name               0
address            0
postcode           0
easting            0
northing           0
latitude           0
longitude          0
local_authority    0
dtype: int64

In [9]:
# Datatype of all columns
df_gc.dtypes

fsa_id              int64
name               object
address            object
postcode           object
easting             int64
northing            int64
latitude           object
longitude          object
local_authority    object
dtype: object

Datatype of Latitude and Longitude is showing object type because there are Null Values present in it as string '\N'.

In [10]:
# Replacing Missing Values ('\NA') with np.NaN (Null Values)
df_gc = df_gc.replace('\\N', np.NaN)

In [11]:
# First 5 rows
df_gc.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
1,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
2,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,,,Babergh
3,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,,,Babergh
4,97,Brewers Arms,"The Brewers Arms, Bower House Tye, Polstead, C...",CO6 5BZ,598743,240655,52.028694,0.89565,Babergh


In [12]:
# Checking of Null Values
df_gc.isnull().sum()

fsa_id               0
name                 0
address              0
postcode             0
easting              0
northing             0
latitude           767
longitude          767
local_authority      0
dtype: int64

In [13]:
# Dropping Null Values
df_gc = df_gc.dropna()

In [14]:
# First 5 rows
df_gc.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
1,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
4,97,Brewers Arms,"The Brewers Arms, Bower House Tye, Polstead, C...",CO6 5BZ,598743,240655,52.028694,0.89565,Babergh
5,102,Bristol Arms,"Bristol Hill, Shotley, IPSWICH",IP9 1PU,624624,233550,51.955042,1.267642,Babergh
6,122,Caffeine Lounge,"14 Borehamgate Shopping Precinct, King Street,...",CO10 2ED,587527,241247,52.037903,0.732687,Babergh


In [15]:
# Dataframe shape
df_gc.shape

(50563, 9)

In [16]:
# Checking for Null Values
df_gc.isnull().sum()

fsa_id             0
name               0
address            0
postcode           0
easting            0
northing           0
latitude           0
longitude          0
local_authority    0
dtype: int64

In [17]:
# Checking datatypes of the columns
df_gc.dtypes

fsa_id              int64
name               object
address            object
postcode           object
easting             int64
northing            int64
latitude           object
longitude          object
local_authority    object
dtype: object

In [18]:
# Changing latitude and longitude columns datatype from object to float
df_gc[['latitude', 'longitude']] = df_gc[['latitude', 'longitude']].astype('float64')

In [19]:
# Checking the datatype of columns
df_gc.dtypes

fsa_id               int64
name                object
address             object
postcode            object
easting              int64
northing             int64
latitude           float64
longitude          float64
local_authority     object
dtype: object

In [20]:
# Dropping Unnecessary columns according to our Objective
df_gc.drop(df_gc[['easting', 'northing']], axis = 1, inplace = True)

In [21]:
# Dataframe shape
df_gc.shape

(50563, 7)

In [22]:
# First 5 rows
df_gc.head()

Unnamed: 0,fsa_id,name,address,postcode,latitude,longitude,local_authority
0,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,51.958698,1.057832,Babergh
1,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,52.038595,0.729915,Babergh
4,97,Brewers Arms,"The Brewers Arms, Bower House Tye, Polstead, C...",CO6 5BZ,52.028694,0.89565,Babergh
5,102,Bristol Arms,"Bristol Hill, Shotley, IPSWICH",IP9 1PU,51.955042,1.267642,Babergh
6,122,Caffeine Lounge,"14 Borehamgate Shopping Precinct, King Street,...",CO10 2ED,52.037903,0.732687,Babergh


In [23]:
# Total unique count of fsa_id and name column
print("Total Count of unique fsa_id: ", len(df_gc['fsa_id'].unique()))
print("Total Count of unique name: ", len(df_gc['name'].unique()))

Total Count of unique fsa_id:  50563
Total Count of unique name:  35809


In [24]:
print("Total Count of unique local_authority: ", len(df_gc['local_authority'].unique()))

Total Count of unique local_authority:  360


**Observation:** There are different Pubs with same name in the United Kingdom.

In [25]:
df_gc[['latitude', 'longitude']].head()

Unnamed: 0,latitude,longitude
0,51.958698,1.057832
1,52.038595,0.729915
4,52.028694,0.89565
5,51.955042,1.267642
6,52.037903,0.732687
