In [34]:
# Calculate PB Value
import math
import re

PAGINATION_PARAMETERS_REGEX = re.compile(
    r"""
    \A                                      # Start of string
    (?:\s*)                                 # Initial possible whitespace
    @(?P<latitude>[-+]?\d{1,2}(?:[.,]\d+)?)  # Latitude: @10.78472
    (?:\s*,\s*)                             # Separator between latitude and longitude
    (?P<longitude>[-+]?\d{1,3}(?:[.,]\d+)?)  # Longitude: @-110
    (?:\s*,\s*)                             # Separator between longitude and zoom
    (?P<zoom>\d{1,2}(?:[.,]\d+)?)z           # Zoom: 9.22
    $                                      # End of string
    """, re.VERBOSE

)

EARTH_RADIUS_IN_METERS = 6371010
TILE_SIZE = 256
SCREEN_PIXEL_HEIGHT = 768
RADIUS_X_PIXEL_HEIGHT = 27.3611 * EARTH_RADIUS_IN_METERS * SCREEN_PIXEL_HEIGHT

def altitude(zoom, latitude):
    return str((RADIUS_X_PIXEL_HEIGHT * math.cos((latitude * math.pi) / 180)) / ((2 ** zoom) * TILE_SIZE))

def pagination(location_lat_long, start_offset):
    extracted_parameters = PAGINATION_PARAMETERS_REGEX.match(location_lat_long)

    if not extracted_parameters:
        return ""

    return (
        "!4m8!1m3!1d" +
        altitude(float(extracted_parameters['zoom']), float(extracted_parameters['latitude'])) +
        "!2d" +
        extracted_parameters['longitude'] +
        "!3d" +
        extracted_parameters['latitude'] +
        "!3m2!1i1024!2i768!4f13.1!7i20!8i" +
        (start_offset or "0") +
        "!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1s!2z!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m16!2b1!3b1!4b1!6b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m53!1m49!2m7!1u3!4s!5e1!9s!10m2!3m1!1e1!2m7!1u2!4s!5e1!9s!10m2!2m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2s!2m4!1m2!16m1!1e2!2s!3m1!1u2!3m1!1u3!4BIAE!2e2!3m1!3b1!59B!65m0!69i540"
    )

In [35]:
location_lat_long = "@47.841147,10.652438,11z"  # New coordinates for the desired area
start_offset = '20'  # Starting offset, adjust as needed
place_name = 'landmark'  # Changed from 'Martial Arts Club' to 'hiking trails'

# Headers remain unchanged
headers = {
    'User-Agent': 'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Cookie': "place_your_cookie_here",  # Remember to replace this with an actual cookie if necessary
    'Referer': 'https://www.google.com/',
    'Accept' : '*/*',
    'Accept-Language' : 'en-US,en;q=0.9'
}

# Params are updated with the new `pb` value from the `pagination` function call
params = {
    'tbm': 'map',
    'authuser': '0',
    'hl': 'en',
    'pb': pagination(location_lat_long, start_offset),
    'q': place_name,
    'tch': '1',
    'ech': '5',
}

In [36]:
import requests
url = "https://www.google.com/search"
response = requests.get(url, params=params, headers=headers)
messy_data = response.text

In [50]:
# for place in places_data[1:5]:  # Print details for a few more entries and more of the data structure
#     print(json.dumps(place, indent=4))

In [43]:
# Extract latitude and longitude with checks for data existence and structure
latitude = [i[14][2] if len(i) > 14 and i[14] and len(i[14]) > 2 and isinstance(i[14][2], (float, int)) else None for i in places_data[1:]]
longitude = [i[14][3] if len(i) > 14 and i[14] and len(i[14]) > 3 and isinstance(i[14][3], (float, int)) else None for i in places_data[1:]]

# Update the dictionary with these values
places_dict['latitude'] = latitude
places_dict['longitude'] = longitude

In [55]:
latitude = [i[14][18][2] if len(i) > 14 and i[14] and len(i[14]) > 18 and i[14][18] and len(i[14][18]) > 2 else None for i in places_data[1:]]
longitude = [i[14][18][3] if len(i) > 14 and i[14] and len(i[14]) > 18 and i[14][18] and len(i[14][18]) > 3 else None for i in places_data[1:]]

# Print out to check if extraction is now correct
print("Sample latitudes:", latitude[:5])
print("Sample longitudes:", longitude[:5])

Sample latitudes: ['d', 't', 'l', 'a', 'r']
Sample longitudes: ['r', 'e', 'v', 'r', 'g']


In [38]:
# Example print statements to check the content before conversion
print("Latitude sample:", latitude[:5])
print("Longitude sample:", longitude[:5])

Latitude sample: [['Unnamed Road', '87724 Ottobeuren'], ['87650 Baisweil'], ['Webergasse 14', '87435 Kempten (Allgäu)'], ['Pfarrer-Burger-Weg 3', '87733 Markt Rettenbach'], ['87499 Wildpoldsried']]
Longitude sample: [None, None, None, None, None]


In [49]:
latitude

['d',
 't',
 'l',
 'a',
 'r',
 'h',
 'l',
 'e',
 'n',
 'f',
 's',
 's',
 'r',
 's',
 'h',
 'd',
 'r',
 't',
 'e',
 'n']

In [46]:
import json
data = json.loads(messy_data[:-6])
javascript_array_str = data['d'][5:]
python_list = json.loads(javascript_array_str)
places_data = python_list[0][1]

name = [i[14][11] for i in places_data[1:]]
Addresses = [i[14][2] for i in places_data[1:]]
website = [i[14][7] for i in places_data[1:]]
phone_number = [i[14][178] for i in places_data[1:]]
open_close_timing = [i[14][34] for i in places_data[1:]]
reviews_rating = [i[14][4] for i in places_data[1:]]

latitude = [i[14][18][2] if len(i) > 14 and i[14] and len(i[14]) > 18 and i[14][18] and len(i[14][18]) > 2 else None for i in places_data[1:]]
longitude = [i[14][18][3] if len(i) > 14 and i[14] and len(i[14]) > 18 and i[14][18] and len(i[14][18]) > 3 else None for i in places_data[1:]]

places_dict = {
    "name": [i for i in name],
    "address": [i[0] for i in Addresses],
    "website": [i[0] if i is not None else 'None' for i in website],
    "phone_number": [i for i in phone_number],
    "open_close_timing": [i for i in open_close_timing],
    "reviews_rating": [i for i in reviews_rating]
}
places_dict['phone_number'] = [data[0][0] if data is not None and data[0] is not None else 'None' for data in places_dict['phone_number']]

# Adding these to your dictionary
places_dict['latitude'] = latitude
places_dict['longitude'] = longitude

timings = []
for list_index in range(len(places_dict['open_close_timing'])):
  temp_dic = {}
  if places_dict['open_close_timing'][list_index]:
    if places_dict['open_close_timing'][list_index][1]:
      for i in range(7):
        temp_dic[places_dict['open_close_timing'][list_index][1][i][0]] = places_dict['open_close_timing'][list_index][1][i][1][0].replace('\u202f', ' ')
  else:
    temp_dic['days'] = None
  timings.append(temp_dic)
places_dict['open_close_timing'] = [i for i in timings]

def place_url(url):
  pattern = r"placeid=([^\&]+)"
  match = re.search(pattern, url)
  return 'https://www.google.com/maps/place/?q=place_id:' + match.group(1)

places_dict['ratings'] = [i[7] if i is not None else None for i in places_dict['reviews_rating']]
places_dict['reviews'] = [i[3][1] if i is not None else None for i in places_dict['reviews_rating']]
places_dict['gmap_link'] = [place_url(x) if x is not None else None for x in [i[3][0] if i is not None else None for i in places_dict['reviews_rating']]]
if 'reviews_rating' in places_dict.keys():
  del places_dict['reviews_rating']
else:
  print('It"s Already deleted')

In [47]:
import pandas as pd
df = pd.DataFrame(places_dict)
new_order = ['name', 'phone_number', 'ratings', 'reviews', 'website', 'address', 'latitude', 'longitude', 'gmap_link', 'open_close_timing']
df = df[new_order] 
df['address'] = df.address.astype(str).str.replace('[','').str.replace(']','')
df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce')  # Convert to floats, setting errors='coerce' will convert non-convertible values to NaN
df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce')
df = df.astype(object)
df.ratings = df.ratings.astype(str)
df.open_close_timing = df.open_close_timing.astype(str)

In [48]:
df

Unnamed: 0,name,phone_number,ratings,reviews,website,address,latitude,longitude,gmap_link,open_close_timing
0,Hydraulischer Widder,,,,,Unnamed Road,,,,{}
1,Alter Friedhof,,5.0,2 reviews,,87650 Baisweil,,,https://www.google.com/maps/place/?q=place_id:...,{'days': None}
2,Pulverturm,,4.2,5 reviews,https://de.m.wikipedia.org/wiki/Stadtbefestigu...,Webergasse 14,,,https://www.google.com/maps/place/?q=place_id:...,"{'Thursday': 'Open 24 hours', 'Friday': 'Open ..."
3,Pfarrhof Frechenrieden,08392 281,,,,Pfarrer-Burger-Weg 3,,,,{'days': None}
4,Burg Wolkenberg,,4.5,102 reviews,http://www.burgfreunde-wolkenberg.de/,87499 Wildpoldsried,,,https://www.google.com/maps/place/?q=place_id:...,{'days': None}
5,Bahnhof Wildpoldsried,,,,,Bahnhofstraße 23,,,,{'days': None}
6,"Ellighofen, Kriegerdenkmal",,,,,St.-Stephanus-Straße 3,,,,{'days': None}
7,Ehemaliger Gasthof Post,,5.0,1 review,,Rottenbucher Str. 9,,,https://www.google.com/maps/place/?q=place_id:...,{'days': None}
8,Denkmal Iller Unglück,,4.3,22 reviews,,87439 Kempten (Allgäu),,,https://www.google.com/maps/place/?q=place_id:...,{'days': None}
9,Hofstelle Mit Brunnen,,3.8,5 reviews,,87651,,,https://www.google.com/maps/place/?q=place_id:...,"{'Thursday': 'Open 24 hours', 'Friday': 'Open ..."
