https://github.com/tanjimanasreen/GoogleMaps-Restaurant-Scraper/blob/main/Restaurant_Scrapper.ipynb

**`Installing required libraries`**

In [1]:
%%capture

!pip install python-google-places
!pip install langdetect
!pip install bnlp_toolkit
!wget https://www.omicronlab.com/download/fonts/kalpurush.ttf
!wget https://www.omicronlab.com/download/fonts/Siyamrupali.ttf
!pip install folium
!pip install geopandas

**`Importing required Libraries`**

In [1]:
import re
import time
import html
import folium
import unicodedata
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
from wordcloud import WordCloud
from googleplaces import GooglePlaces, types, lang
from IPython.display import Markdown, display
from langdetect import detect
from folium.plugins import MarkerCluster  #marketcluster plugin
from folium.plugins import MousePosition  # MousePosition plugin
from folium.features import DivIcon  #DivIcon plugin   

**`Read the dataset`**

https://www.kaggle.com/mushfiqurrobin/network-coverage

In [2]:
df = pd.read_csv(r'J:\Coverage.csv')
df.head()

Unnamed: 0,Index,Upazila_or_Thana,District,Area_Zip_Code,Latitude,Longitude,Operator,Active_Network_Available,Network_Level,Satisfaction_Score,Real_Time_Experience
0,0,Kawkhali,Pirojpur,8510,22.626128,90.059277,GP,4G,Low,3,4.0
1,1,Mathbaria,Pirojpur,8560,22.287592,89.959272,GP,4G,Low,3,4.0
2,2,Nazirpur,Pirojpur,8540,22.729367,89.96088,GP,4G,Low,3,5.0
3,3,Nesarabad,Pirojpur,8522,22.7472,90.1036,GP,4G,Moderate,6,6.0
4,4,Pirojpur Sadar,Pirojpur,8500,22.5778,89.99,GP,4G,Good,10,7.5


In [3]:
area = df[['District', 'Upazila_or_Thana']]
area.head()

Unnamed: 0,District,Upazila_or_Thana
0,Pirojpur,Kawkhali
1,Pirojpur,Mathbaria
2,Pirojpur,Nazirpur
3,Pirojpur,Nesarabad
4,Pirojpur,Pirojpur Sadar


In [4]:
# Checking the Missing Values in total and percentage calculation

total = area.isnull().sum().sort_values(ascending=False)
percent = (area.isnull().sum()/area.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent*100], axis=1, keys=['total', 'percent'])
display(missing_data.head(5))

Unnamed: 0,total,percent
District,0,0.0
Upazila_or_Thana,0,0.0


In [5]:
# Checking Duplicate Rows 
area.duplicated().any()

True

In [6]:
area.duplicated().sum()

21800

In [7]:
## as there are duplicate values available, will be removing the duplicates

area.drop_duplicates(keep="first", inplace=True)
area.reset_index(drop=True, inplace=True)

area.to_csv("LocationsData.csv", index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  area.drop_duplicates(keep="first", inplace=True)


In [8]:
API_key = "AIzaSyBwcVq58Byk58xXHQDQ-5FSNi1TOyJQ2aA"
google_places = GooglePlaces(API_key)


In [9]:
scraped_restaurants = []
radius = 100

# Converting the list of Upazilla/Thana and District into a combined string
locations = []
areaList = area.values.tolist()

for area in areaList:
    locationNames = ', '.join([str(item) for item in area])
    locations.append(locationNames)

print(locations)

['Pirojpur, Kawkhali', 'Pirojpur, Mathbaria', 'Pirojpur, Nazirpur', 'Pirojpur, Nesarabad', 'Pirojpur, Pirojpur Sadar', 'Pirojpur, Zianagar', 'Brahmanbaria, Akhaura', 'Brahmanbaria, Ashuganj', 'Brahmanbaria, Brahmanbaria Sadar', 'Brahmanbaria, Bancharampur', 'Brahmanbaria, Bijoynagar', 'Brahmanbaria, Kasba', 'Brahmanbaria, Nabinagar', 'Brahmanbaria, Nasirnagar', 'Brahmanbaria, Sarail', 'Bandarban, Alikadam', 'Bandarban, Bandarban Sadar', 'Bandarban, Lama', 'Bandarban, Naikhongchhari', 'Bandarban, Rowangchari', 'Bandarban, Ruma', 'Bandarban, Thanchi', 'Chandpur, Chandpur Sadar', 'Chandpur, Faridganj', 'Chandpur, Haimchar', 'Chandpur, Hajiganj', 'Chandpur, Kachua', 'Chandpur, Matlab (Dakshin)', 'Chandpur, Matlab (Uttar)', 'Chandpur, Shahrasti', 'Chittagong, Anwara', 'Chittagong, Banskhali', 'Chittagong, Boalkhali', 'Chittagong, Chandanish', 'Chittagong, Fatikchari', 'Chittagong, Karnaphuli', 'Chittagong, Lohagara', 'Chittagong, Mirsharai', 'Chittagong, Patiya', 'Chittagong, Rangunia', 'Ch

In [10]:
for location in locations:
    print(".............", location, "...............")
    location_result = google_places.nearby_search(location=location, keyword='Restaurant', radius=radius) 
    if location_result:
        for place in location_result.places:
            place.get_details()
            place_id = place.details.get('place_id')
            name = place.name
            latitude = place.geo_location.get('lat')
            longitude = place.geo_location.get('lng')
            rating = place.rating
            number_of_reviews = place.details.get('user_ratings_total')
            affluence = place.details.get('price_level')
            address = place.formatted_address

            scraped_restaurants.append([place_id, name, latitude, longitude, rating, number_of_reviews, affluence, address])
            # print(place.details)

            # print(restaurant_data)
            print(".................. Scrapped Restaurants: ", len(scraped_restaurants))
            time.sleep(5) 

            while location_result.has_next_page_token:
                query_result = google_places.nearby_search(location=location, keyword='Restaurant',
                radius=radius, pagetoken=location_result.next_page_token)
                
                for place in location_result.places:
                    place.get_details()
                    place_id = place.details.get('place_id')
                    name = place.name
                    latitude = place.geo_location.get('lat')
                    longitude = place.geo_location.get('lng')
                    rating = place.rating
                    number_of_reviews = place.details.get('user_ratings_total')
                    affluence = place.details.get('price_level')
                    address = place.formatted_address

                    scraped_restaurants.append([place_id, name, latitude, longitude, rating, number_of_reviews, affluence, address])
                     # print(place.details)
                    # print(restaurant_data)  
                    print(".................. Scrapped Restaurants: ", len(scraped_restaurants))
                    time.sleep(5) 
                    

            time.sleep(5)

# Dumping the data into a DataFrame
df_restaurant = pd.DataFrame(restaurant_data, columns=['place_id', 'name', 'latitude', 'longitude', 'rating', 'number_of_reviews', 'affluence', 'address'])

df_restaurant.to_csv("restaurants.csv", index=False, encoding='utf-8')

............. Pirojpur, Kawkhali ...............


URLError: <urlopen error [Errno 11001] getaddrinfo failed>

read more

**Documentations :**
- https://medium.com/swlh/scraping-google-maps-using-selenium-3cec08eb6a92
- https://towardsdatascience.com/creating-a-dataset-using-an-api-with-python-dcc1607616d
- https://outscraper.com/scrape-google-maps-in-python/
- https://towardsdatascience.com/foods-around-me-google-maps-data-scraping-with-python-google-colab-588986c63db3
- https://app.outscraper.com/api-docs
- Scrape Google Maps Data Legally via Official API : https://stevesie.com/apps/google-maps-api