# Cafe and restaurant - API and Wrangling.

In [2]:
import requests
import pandas as pd
import os
import numpy as np

## API function

In [3]:
def fetch_data(base_url, dataset, api_key, num_records, max_offset, offset=0):
    '''
        The Function is used to return a dataset from API
        
    '''
    all_records =[]
    #Maximum number of requests

    while True:
        # Maximum limit check
        if offset > max_offset:
            break
        
        # Create API request URL
        filters = f'{dataset}/records?limit={num_records}&offset={offset}&refine=census_year%3A%222022%22'
        url = f'{base_url}{filters}&api_key={api_key}'
        
        # Start request
        try:
            result = requests.get(url, timeout=10)
            result.raise_for_status()
            records = result.json().get('results')
        except request.exceptions.RequestException as e:
            raise Exception(f'API request failed: {e}')
        
        if records is None:
            break
        
        all_records.extend(records)
        if len(records) < num_records:
            break
        
        # next cycle offset
        offset += num_records
    
    # Dataframe all data
    df = pd.DataFrame(all_records)
    return df



## Creating the DataSet .
#### - API key is obtained from the MOP
#### - API for dataset is displayed below:
https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/cafes-and-restaurants-with-seating-capacity/records?limit=20&refine=census_year%3A%222022%22
#### - Fetching the dataset
#### - Display the dataset

In [4]:
# API deconstructed below:
API_KEY = '501c0c6bc1c0b59eb726ecacb4075dc40a606494551bd44bf024087c'
BASE_URL = "https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/"
DATASET = 'cafes-and-restaurants-with-seating-capacity'
NUM_RECORD = 20
MAX_OFFSET = 3031

In [6]:
# Fetching the Dataset
df = fetch_data(BASE_URL, DATASET, API_KEY, NUM_RECORD, MAX_OFFSET)

In [7]:
# Displaying the DataSet
df.head(2)

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_address,clue_small_area,trading_name,business_address,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,longitude,latitude,location
0,2022,1110,620301,620301,120 Pearl River Road DOCKLANDS VIC 3008,Docklands,Yassas,"Shop 14A, Ground 120 Pearl River Road DOCKLAND...",4511,Cafes and Restaurants,Seats - Indoor,54,144.936589,-37.811859,"{'lon': 144.93658926722165, 'lat': -37.811859459}"
1,2022,1112,103980,103980,Flinders Wharf Apartments 40-66 Siddeley Stree...,Docklands,Them Authentic Vietnamese Cuisine & Rolls,"Part Unit 13, Ground 60 Siddeley Street DOCKLA...",4511,Cafes and Restaurants,Seats - Outdoor,34,144.952479,-37.822884,"{'lon': 144.95247888392848, 'lat': -37.8228836..."


## Steps for cleaning the dataset:
#### - Check for Null values
#### - General information such as Shape and description
#### - Checking for Duplicates

In [9]:
# Checking for Null values
df.isna().sum()

census_year                     0
block_id                        0
property_id                     0
base_property_id                0
building_address                0
clue_small_area                 0
trading_name                    0
business_address                0
industry_anzsic4_code           0
industry_anzsic4_description    0
seating_type                    0
number_of_seats                 0
longitude                       0
latitude                        0
location                        0
dtype: int64

In [11]:
# Checking the Shape of the DataSet
df.shape

(3031, 15)

In [10]:
# Describe
df.describe()

Unnamed: 0,block_id,number_of_seats,longitude,latitude
count,3031.0,3031.0,3031.0,3031.0
mean,346.588255,57.149786,144.960496,-37.812743
std,437.984272,139.872363,0.011168,0.009101
min,1.0,2.0,144.904228,-37.849719
25%,52.5,16.0,144.955504,-37.817309
50%,95.0,33.0,144.962183,-37.813296
75%,644.0,68.0,144.96724,-37.808975
max,2546.0,4920.0,144.990561,-37.777494


In [13]:
# Checking for Duplicates
bool_series = df[['property_id','trading_name']].duplicated(keep=False)
len(df[bool_series])

1896

### - Using duplicated(keep=False) will return all duplicated values which provided an opportunity to perform further checks
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.duplicated.html

In [14]:
# From the Duplicate, there are a high chance of duplicated values
# Displaying the first 10 duplicated values
df[bool_series].sort_values(by=['trading_name']).head(10)

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_address,clue_small_area,trading_name,business_address,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,longitude,latitude,location
2027,2022,34,110669,110669,353-359 Little Collins Street MELBOURNE VIC 3000,Melbourne (CBD),11 Inch Pizza,"Shop 5B, 353 Little Collins Street MELBOURNE V...",4512,Takeaway Food Services,Seats - Indoor,14,144.962995,-37.815698,"{'lon': 144.96299531095, 'lat': -37.8156982802..."
2028,2022,34,110669,110669,353-359 Little Collins Street MELBOURNE VIC 3000,Melbourne (CBD),11 Inch Pizza,"Shop 5B, 353 Little Collins Street MELBOURNE V...",4512,Takeaway Food Services,Seats - Outdoor,4,144.962995,-37.815698,"{'lon': 144.96299531095, 'lat': -37.8156982802..."
518,2022,255,109366,109366,127-135 Pelham Street CARLTON VIC 3053,Carlton,127 Cafe Go,127 Pelham Street CARLTON VIC 3053,4511,Cafes and Restaurants,Seats - Outdoor,8,144.963945,-37.80277,"{'lon': 144.96394524391638, 'lat': -37.8027699..."
519,2022,255,109366,109366,127-135 Pelham Street CARLTON VIC 3053,Carlton,127 Cafe Go,127 Pelham Street CARLTON VIC 3053,4511,Cafes and Restaurants,Seats - Indoor,48,144.963945,-37.80277,"{'lon': 144.96394524391638, 'lat': -37.8027699..."
1208,2022,785,110535,110535,263-329 Lorimer Street PORT MELBOURNE VIC 3207,Port Melbourne,14 Days Of Cheese,"Suite 1, Pier 35 263-329 Lorimer Street PORT M...",4511,Cafes and Restaurants,Seats - Indoor,60,144.904228,-37.825364,"{'lon': 144.90422836884795, 'lat': -37.8253639..."
1209,2022,785,110535,110535,263-329 Lorimer Street PORT MELBOURNE VIC 3207,Port Melbourne,14 Days Of Cheese,"Suite 1, Pier 35 263-329 Lorimer Street PORT M...",4511,Cafes and Restaurants,Seats - Outdoor,30,144.904228,-37.825364,"{'lon': 144.90422836884795, 'lat': -37.8253639..."
1203,2022,773,577280,577280,72-82 Lorimer Street DOCKLANDS VIC 3008,Docklands,18 Pence Lane Coffee & Food,82 River Esplanade DOCKLANDS VIC 3008,4511,Cafes and Restaurants,Seats - Outdoor,40,144.944971,-37.824709,"{'lon': 144.94497132452153, 'lat': -37.8247087..."
2335,2022,773,577280,577280,72-82 Lorimer Street DOCKLANDS VIC 3008,Docklands,18 Pence Lane Coffee & Food,82 River Esplanade DOCKLANDS VIC 3008,4511,Cafes and Restaurants,Seats - Indoor,20,144.944971,-37.824709,"{'lon': 144.94497132452153, 'lat': -37.8247087..."
1393,2022,35,109267,109267,220-226 Collins Street MELBOURNE VIC 3000,Melbourne (CBD),1932 Cafe & Restaurant,"Shop 10, Ground 93-107 Swanston Street MELBOUR...",4511,Cafes and Restaurants,Seats - Outdoor,24,144.966103,-37.815297,"{'lon': 144.9661030243, 'lat': -37.81529690772..."
1394,2022,35,109267,109267,220-226 Collins Street MELBOURNE VIC 3000,Melbourne (CBD),1932 Cafe & Restaurant,"Shop 10, Ground 93-107 Swanston Street MELBOUR...",4511,Cafes and Restaurants,Seats - Indoor,14,144.966103,-37.815297,"{'lon': 144.9661030243, 'lat': -37.81529690772..."


### - The display indicates the duplicate is due to the seating, for instance a Business can have inside and outside seating.
### - The step is to check duplicates in regards to seating_type

In [16]:
check_seating = df[bool_series].loc[:,['trading_name','seating_type']].sort_values(by=['trading_name']).groupby(['trading_name']).count()

In [17]:
check_seating.head(5)

Unnamed: 0_level_0,seating_type
trading_name,Unnamed: 1_level_1
11 Inch Pizza,2
127 Cafe Go,2
14 Days Of Cheese,2
18 Pence Lane Coffee & Food,2
1932 Cafe & Restaurant,2


In [19]:
# Checking whether there are seating_type greater than 2
check_seating.loc[check_seating['seating_type']>2] 

Unnamed: 0_level_0,seating_type
trading_name,Unnamed: 1_level_1
A Treat of France,4
Assembly Store,4
Benny's Bakery Cafe,4
Bluebag,4
Cafenatics,6
Degani Bakery Cafe,6
Earl Canteen,4
Goz City,4
Grill'd,4
In A Rush Espresso,4


### - The next step is to check duplicates in regards to seating_type and trading_name

In [20]:
# Checking 'Universal Restaurant'
df.loc[df['trading_name']=="Universal Restaurant"]

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_address,clue_small_area,trading_name,business_address,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,longitude,latitude,location
516,2022,254,106090,106090,135-137 Lygon Street CARLTON VIC 3053,Carlton,Universal Restaurant,135-137 Lygon Street CARLTON VIC 3053,4511,Cafes and Restaurants,Seats - Indoor,60,144.966076,-37.80391,"{'lon': 144.9660759838103, 'lat': -37.8039101353}"
517,2022,254,106090,106090,135-137 Lygon Street CARLTON VIC 3053,Carlton,Universal Restaurant,135-137 Lygon Street CARLTON VIC 3053,4511,Cafes and Restaurants,Seats - Outdoor,60,144.966076,-37.80391,"{'lon': 144.9660759838103, 'lat': -37.8039101353}"
1857,2022,254,106091,106091,139-141 Lygon Street CARLTON VIC 3053,Carlton,Universal Restaurant,139-141 Lygon Street CARLTON VIC 3053,4511,Cafes and Restaurants,Seats - Indoor,100,144.966087,-37.803848,"{'lon': 144.9660867199495, 'lat': -37.8038476141}"
2085,2022,254,106091,106091,139-141 Lygon Street CARLTON VIC 3053,Carlton,Universal Restaurant,139-141 Lygon Street CARLTON VIC 3053,4511,Cafes and Restaurants,Seats - Outdoor,90,144.966087,-37.803848,"{'lon': 144.9660867199495, 'lat': -37.8038476141}"


In [22]:
# Checking 'Subway'
df.loc[df['trading_name']=="Subway"].head(5)

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_address,clue_small_area,trading_name,business_address,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,longitude,latitude,location
101,2022,51,105301,105301,175-177 King Street MELBOURNE VIC 3000,Melbourne (CBD),Subway,Part Ground 175-177 King Street MELBOURNE VIC ...,4512,Takeaway Food Services,Seats - Indoor,16,144.955232,-37.816025,"{'lon': 144.95523196469128, 'lat': -37.8160250..."
123,2022,54,103180,103180,187-193 Elizabeth Street MELBOURNE VIC 3000,Melbourne (CBD),Subway,Part Ground 187-193 Elizabeth Street MELBOURNE...,4512,Takeaway Food Services,Seats - Outdoor,8,144.962708,-37.813888,"{'lon': 144.9627082819642, 'lat': -37.81388772..."
407,2022,114,562692,562692,465 Elizabeth Street MELBOURNE VIC 3000,Melbourne (CBD),Subway,Ground 465 Elizabeth Street MELBOURNE VIC 3000,4512,Takeaway Food Services,Seats - Indoor,18,144.960008,-37.808065,"{'lon': 144.9600075589612, 'lat': -37.8080653997}"
814,2022,710,108625,108625,407B-407D St Kilda Road MELBOURNE VIC 3004,Melbourne (Remainder),Subway,407B St Kilda Road MELBOURNE VIC 3004,4512,Takeaway Food Services,Seats - Outdoor,8,144.976195,-37.83627,"{'lon': 144.97619479875, 'lat': -37.8362698264..."
950,2022,1101,110843,110843,Spencer Outlet Centre 163-261 Spencer Street D...,Docklands,Subway,Ground 221 Spencer Street DOCKLANDS VIC 3008,4512,Takeaway Food Services,Seats - Outdoor,9,144.950564,-37.814509,"{'lon': 144.9505641426, 'lat': -37.81450897357..."


### The last step is to check for duplicates for the features:
    - property_id
    - trading_name
    - business_address
    - number_of_seats

In [25]:
check_3 = df[['property_id','trading_name','business_address','number_of_seats']].duplicated(keep=False)
len(df[check_3])

96

In [28]:
# Returning the top 10 Duplicates
df[check_3].loc[:,['trading_name','seating_type']].sort_values(by=['trading_name']).groupby(['trading_name']).count().head(10)

Unnamed: 0_level_0,seating_type
trading_name,Unnamed: 1_level_1
Anchor Eatery,2
Baguette Studios,2
Benny's Bakery Cafe,2
Bowery To Williamsburg,2
Breslin Bar And Grill,2
Brioche by Philip,2
Campari House,2
Casa Del Gelato,2
Chai 'N' Chilli,2
Chocolateria San Churro QV,2


In [29]:
# Checking 'Anchor Eatery'
df.loc[df['trading_name']=='Anchor Eatery']

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_address,clue_small_area,trading_name,business_address,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,longitude,latitude,location
1205,2022,773,627016,627016,73-91 South Wharf Drive DOCKLANDS VIC 3008,Docklands,Anchor Eatery,196 River Esplanade DOCKLANDS VIC 3008,4512,Takeaway Food Services,Seats - Indoor,7,144.935919,-37.822384,"{'lon': 144.9359189686012, 'lat': -37.82238445..."
2338,2022,773,627016,627016,73-91 South Wharf Drive DOCKLANDS VIC 3008,Docklands,Anchor Eatery,196 River Esplanade DOCKLANDS VIC 3008,4512,Takeaway Food Services,Seats - Outdoor,7,144.935919,-37.822384,"{'lon': 144.9359189686012, 'lat': -37.82238445..."


In [30]:
df.loc[df['trading_name']=='Urban Deli']

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_address,clue_small_area,trading_name,business_address,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,longitude,latitude,location
1458,2022,47,602997,103597,SX121 121 Exhibition Street MELBOURNE VIC 3000,Melbourne (CBD),Urban Deli,"Shop 8, Gnd & Mezz 121 Exhibition Street MELBO...",4511,Cafes and Restaurants,Seats - Outdoor,100,144.970109,-37.812814,"{'lon': 144.97010937553796, 'lat': -37.8128141..."
1459,2022,47,602997,103597,SX121 121 Exhibition Street MELBOURNE VIC 3000,Melbourne (CBD),Urban Deli,"Shop 8, Gnd & Mezz 121 Exhibition Street MELBO...",4511,Cafes and Restaurants,Seats - Indoor,100,144.970109,-37.812814,"{'lon': 144.97010937553796, 'lat': -37.8128141..."


## Conclusions:
### - Null values:
    - No Null values in Dataset.
### - Shape and Describe
    - The Dataset contains 15 features where not all are required
    - Further processing is required to trim down the Dataset
### - Duplicates
    - The Dataset do not include duplicates.
    - Each duplicate step presented information about the Dataset for instance:
        - Step 1 indicated the duplicate was due to inside and outside seating.
        - Step 2 indicated some businesses were big enough to have multiple addresses.
        - Step 3 identified a few businesses which have the same amount of inside and outside seating.
        - For Step 2 and 3, the data was double checked online through google.

## Processing the Dataset
    - In this section, the dataset is going to be trimmed down to 5 features
    - We are also visualising some tables

In [8]:
df.head(2)

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_address,clue_small_area,trading_name,business_address,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,longitude,latitude,location
0,2022,1110,620301,620301,120 Pearl River Road DOCKLANDS VIC 3008,Docklands,Yassas,"Shop 14A, Ground 120 Pearl River Road DOCKLAND...",4511,Cafes and Restaurants,Seats - Indoor,54,144.936589,-37.811859,"{'lon': 144.93658926722165, 'lat': -37.811859459}"
1,2022,1112,103980,103980,Flinders Wharf Apartments 40-66 Siddeley Stree...,Docklands,Them Authentic Vietnamese Cuisine & Rolls,"Part Unit 13, Ground 60 Siddeley Street DOCKLA...",4511,Cafes and Restaurants,Seats - Outdoor,34,144.952479,-37.822884,"{'lon': 144.95247888392848, 'lat': -37.8228836..."


In [9]:
# Selection Features
data = df.loc[:,['clue_small_area','industry_anzsic4_description','number_of_seats','longitude','latitude']]

In [11]:
#Exporting data
data.to_csv('data_cafe.csv')

In [33]:
df.shape

(3031, 15)

In [172]:
data.shape

(3031, 5)

In [233]:
data.loc[:,['clue_small_area','longitude','latitude']].groupby(['clue_small_area']).aggregate([np.mean])

Unnamed: 0_level_0,longitude,latitude
Unnamed: 0_level_1,mean,mean
clue_small_area,Unnamed: 1_level_2,Unnamed: 2_level_2
Carlton,144.966083,-37.800962
Docklands,144.945761,-37.818099
East Melbourne,144.982352,-37.813271
Kensington,144.927898,-37.792378
Melbourne (CBD),144.963589,-37.81369
Melbourne (Remainder),144.978776,-37.838646
North Melbourne,144.950279,-37.801953
Parkville,144.956069,-37.792397
Port Melbourne,144.916466,-37.825409
South Yarra,144.981562,-37.834691


In [244]:
table2 = (data.loc[:,['clue_small_area','industry_anzsic4_description','number_of_seats']].
          groupby(['clue_small_area','industry_anzsic4_description']).
            count()
                 )
table2

Unnamed: 0_level_0,Unnamed: 1_level_0,number_of_seats
clue_small_area,industry_anzsic4_description,Unnamed: 2_level_1
Carlton,Accommodation,6
Carlton,Bakery Product Manufacturing (Non-factory based),6
Carlton,Cafes and Restaurants,286
Carlton,Motion Picture Exhibition,1
Carlton,"Pubs, Taverns and Bars",16
...,...,...
West Melbourne (Residential),Accommodation,3
West Melbourne (Residential),Cafes and Restaurants,49
West Melbourne (Residential),"Pubs, Taverns and Bars",11
West Melbourne (Residential),"Sports and Physical Recreation Venues, Grounds and Facilities Operation",2
