In [5]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Store previous yoga centers to check for duplicates across rows (tracking by placeId)
previous_centers = {}

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Write headers for city, state, country, and up to 20 yoga centers
    header = ["City", "State", "Country"]
    for i in range(1, 21):  # Adjusted for 20 centers
        header.extend([f"center{i}name", f"center{i}website", f"center{i}review"])

    # Create a DataFrame to ensure the sheet is created properly
    df = pd.DataFrame(columns=header)  # Initial empty DataFrame with correct header

    # Write this empty DataFrame first to create the sheet with the correct structure
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        # Initialize counters for found centers
        centers_found = 0
        added_centers = 0
        print(f"Searching for centers in {city}, {state}, {country}")

        # Process the first search response (city, state, country)
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Skip the place if it already exists in the same row (check by placeId)
                if place_id and place_id in seen_center_ids:
                    continue
                # Add to seen centers set to track uniqueness in this row and globally across previous rows
                if place_id:
                    seen_center_ids.add(place_id)

                # Only include centers that have a website
                if website != "N/A":
                    # Construct the review URL using place_id if available
                    review_url = f"https://www.google.com/maps/place/?q=place_id:{place_id}" if place_id else "N/A"

                    # Add center details if the website is present
                    print(f"Found center with website: {name}, {website}, {review_url}")
                    row_data.extend([name, website, review_url])
                    centers_found += 1
                    added_centers += 1

                # Stop adding centers after the first 20 valid ones
                if centers_found >= 20:
                    break

        # If there are less than 20 centers from the city search, perform the country search
        if centers_found < 20:
            print(f"Only {centers_found} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Process the country search response
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    if centers_found >= 20:
                        break  # Stop after reaching 20 centers

                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Skip if already added in this or previous searches
                    if place_id and place_id in seen_center_ids:
                        continue
                    if place_id:
                        seen_center_ids.add(place_id)

                    # Only include centers that have a website
                    if website != "N/A":
                        # Construct the review URL using place_id if available
                        review_url = f"https://www.google.com/maps/place/?q=place_id:{place_id}" if place_id else "N/A"

                        # Add center details if the website is present
                        print(f"Found center with website: {name}, {website}, {review_url}")
                        row_data.extend([name, website, review_url])
                        centers_found += 1
                        added_centers += 1

        # Fill in the missing center details with "N/A" if there are less than 20 valid centers
        while centers_found < 20:
            row_data.extend(["N/A", "N/A", "N/A"])
            centers_found += 1

        # Append row data to the list of all rows
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {centers_found} centers found, "
              f"{added_centers} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (4).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found center with website: Ahola Thoddoo, http://www.aholathoddoo.com/, https://www.google.com/maps/place/?q=place_id:ChIJcei0-MxFQDsRXj-FaFPLo4g
Found center with website: Holiday Cottage Thoddoo, http://www.holidaycottage.com.mv/, https://www.google.com/maps/place/?q=place_id:ChIJhbYr-CZEQDsR_Htxwj2GZF0
Only 2 centers found in Thoddoo, Alif Alif Atoll, Maldives.
 Trying to find more from Maldives
Found center with website: Kingsway Thoddoo, https://www.hotelscheck-in.com/kingswaythoddoomaldives/en/, https://www.google.com/maps/place/?q=place_id:ChIJ0Z4y0atFQDsROj7CrRzurP0
Found center with website: Maldives Fitness Yoga trips, https://t.me/zumbamaldives, https://www.google.com/maps/place/?q=place_id:ChIJf4i67IYRFiQRbWGu1jfcHvg
Found center with website: Amazing View Guest House, https://www.amazingviewthoddoo.com/, https://www.google.com/maps/place/?q=place_id

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Store previous yoga centers to check for duplicates across rows (tracking by placeId)
previous_centers = {}

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Write headers for city, state, country, and up to 20 yoga centers
    header = ["City", "State", "Country"]
    for i in range(1, 21):  # Adjusted for 20 centers
        header.extend([f"center{i}name", f"center{i}website", f"center{i}review"])

    # Create a DataFrame to ensure the sheet is created properly
    df = pd.DataFrame(columns=header)  # Initial empty DataFrame with correct header

    # Write this empty DataFrame first to create the sheet with the correct structure
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        # Initialize counters for found centers
        centers_found = 0
        added_centers = 0
        print(f"Searching for centers in {city}, {state}, {country}")

        # Process the first search response (city, state, country)
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Only include centers that have a website
                if website != "N/A":
                    # Add center to the list (we'll check later if it's unique)
                    city_centers.append((name, website, place_id))

                # Stop adding centers after the first 20 valid ones
                if len(city_centers) >= 20:
                    break

        # If there are less than 20 centers from the city search, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Process the country search response
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Only include centers that have a website
                    if website != "N/A":
                        # Add center to the list (we'll check later if it's unique)
                        country_centers.append((name, website, place_id))

                    # Stop adding centers after reaching 20
                    if len(country_centers) + len(city_centers) >= 20:
                        break

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Only add the first 20 unique centers with websites
            seen_place_ids = set()
            for name, website, place_id in all_centers:
                if place_id and place_id not in seen_place_ids:
                    seen_place_ids.add(place_id)
                    row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])
                    centers_found += 1
                    added_centers += 1

                if centers_found >= 20:
                    break

        # Fill in the missing center details with "N/A" if there are less than 20 valid centers
        while centers_found < 20:
            row_data.extend(["N/A", "N/A", "N/A"])
            centers_found += 1

        # Append row data to the list of all rows
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {centers_found} centers found, "
              f"{added_centers} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (5).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Only 5 centers found in Thoddoo, Alif Alif Atoll, Maldives.
 Trying to find more from Maldives
Row 1 processed: 20 centers found, 5 centers added to the row.
Processing row 2/5...
Searching for centers in Dhigurah, Alif Dhaal Atoll, Maldives
Only 5 centers found in Dhigurah, Alif Dhaal Atoll, Maldives.
 Trying to find more from Maldives
Row 2 processed: 20 centers found, 5 centers added to the row.
Processing row 3/5...
Searching for centers in Dhiffushi, Kaafu Atoll, Maldives
Only 11 centers found in Dhiffushi, Kaafu Atoll, Maldives.
 Trying to find more from Maldives
Row 3 processed: 20 centers found, 11 centers added to the row.
Processing row 4/5...
Searching for centers in Fulidhoo, Vaavu Atoll, Maldives
Only 2 centers found in Fulidhoo, Vaavu Atoll, Maldives.
 Trying to find more from Maldives
Row 4 processed: 20 centers found, 2 centers added to the row.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [7]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Write headers for city, state, country, and up to 20 yoga centers
    header = ["City", "State", "Country"]
    for i in range(1, 21):  # Adjusted for 20 centers
        header.extend([f"center{i}name", f"center{i}website", f"center{i}review"])

    # Create a DataFrame to ensure the sheet is created properly
    df = pd.DataFrame(columns=header)  # Initial empty DataFrame with correct header

    # Write this empty DataFrame first to create the sheet with the correct structure
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        # Initialize counters for found centers
        centers_found = 0
        added_centers = 0
        print(f"Searching for centers in {city}, {state}, {country}")

        # Process the first search response (city, state, country)
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Only include centers that have a website
                if website != "N/A":
                    # Add center to the list (we'll check later if it's unique)
                    city_centers.append((name, website, place_id))

                # Stop adding centers after the first 20 valid ones
                if len(city_centers) >= 20:
                    break

        # Print the number of centers found during city search
        print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")

        # If there are less than 20 centers from the city search, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Process the country search response
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Only include centers that have a website
                    if website != "N/A":
                        # Add center to the list (we'll check later if it's unique)
                        country_centers.append((name, website, place_id))

                    # Stop adding centers after reaching 20
                    if len(country_centers) + len(city_centers) >= 20:
                        break

            # Print the number of centers found during country search
            print(f"Found {len(country_centers)} centers in {country} during country search.")

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Only add the first 20 unique centers with websites
            seen_place_ids = set()
            for name, website, place_id in all_centers:
                if place_id and place_id not in seen_place_ids:
                    seen_place_ids.add(place_id)
                    row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])
                    centers_found += 1
                    added_centers += 1

                if centers_found >= 20:
                    break

        # Fill in the missing center details with "N/A" if there are less than 20 valid centers
        while centers_found < 20:
            row_data.extend(["N/A", "N/A", "N/A"])
            centers_found += 1

        # Append row data to the list of all rows
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {centers_found} centers found, "
              f"{added_centers} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (6).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 4 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Only 4 centers found in Thoddoo, Alif Alif Atoll, Maldives.
 Trying to find more from Maldives
Found 4 centers in Maldives during country search.
Row 1 processed: 20 centers found, 4 centers added to the row.
Processing row 2/5...
Searching for centers in Dhigurah, Alif Dhaal Atoll, Maldives
Found 5 centers in Dhigurah, Alif Dhaal Atoll, Maldives during city search.
Only 5 centers found in Dhigurah, Alif Dhaal Atoll, Maldives.
 Trying to find more from Maldives
Found 5 centers in Maldives during country search.
Row 2 processed: 20 centers found, 7 centers added to the row.
Processing row 3/5...
Searching for centers in Dhiffushi, Kaafu Atoll, Maldives
Found 2 centers in Dhiffushi, Kaafu Atoll, Maldives during city search.
Only 2 centers found in Dhiffushi, Kaafu Atoll, Maldives.
 Trying to

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [8]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Write headers for city, state, country, and up to 20 yoga centers
    header = ["City", "State", "Country"]
    for i in range(1, 21):  # Adjusted for 20 centers
        header.extend([f"center{i}name", f"center{i}website", f"center{i}review"])

    # Create a DataFrame to ensure the sheet is created properly
    df = pd.DataFrame(columns=header)  # Initial empty DataFrame with correct header

    # Write this empty DataFrame first to create the sheet with the correct structure
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        # Initialize counters for found centers
        centers_found = 0
        added_centers = 0
        print(f"Searching for centers in {city}, {state}, {country}")

        # Process the first search response (city, state, country)
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Only include centers that have a website
                if website != "N/A":
                    # Add center to the list (we'll check later if it's unique)
                    city_centers.append((name, website, place_id))

                # Stop adding centers after the first 20 valid ones
                if len(city_centers) >= 20:
                    break

        # Print the number of centers found during city search
        print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")
        # Print details of the centers found in city search
        for center in city_centers:
            name, website, place_id = center
            print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

        # If there are less than 20 centers from the city search, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Process the country search response
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Only include centers that have a website
                    if website != "N/A":
                        # Add center to the list (we'll check later if it's unique)
                        country_centers.append((name, website, place_id))

                    # Stop adding centers after reaching 20
                    if len(country_centers) + len(city_centers) >= 20:
                        break

            # Print the number of centers found during country search
            print(f"Found {len(country_centers)} centers in {country} during country search.")
            # Print details of the centers found in country search
            for center in country_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Only add the first 20 unique centers with websites
            seen_place_ids = set()
            for name, website, place_id in all_centers:
                if place_id and place_id not in seen_place_ids:
                    seen_place_ids.add(place_id)
                    row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])
                    centers_found += 1
                    added_centers += 1

                if centers_found >= 20:
                    break

        # Fill in the missing center details with "N/A" if there are less than 20 valid centers
        while centers_found < 20:
            row_data.extend(["N/A", "N/A", "N/A"])
            centers_found += 1

        # Append row data to the list of all rows
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {centers_found} centers found, "
              f"{added_centers} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (7).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 4 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: Holiday Cottage Thoddoo, Website: http://www.holidaycottage.com.mv/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJhbYr-CZEQDsR_Htxwj2GZF0
Center: Ahola Thoddoo, Website: http://www.aholathoddoo.com/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJcei0-MxFQDsRXj-FaFPLo4g
Center: Kingsway Thoddoo, Website: https://www.hotelscheck-in.com/kingswaythoddoomaldives/en/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJ0Z4y0atFQDsROj7CrRzurP0
Center: Maldives Fitness Yoga trips, Website: https://t.me/zumbamaldives, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJf4i67IYRFiQRbWGu1jfcHvg
Only 4 centers found in Thoddoo, Alif Alif Atoll, Maldives.
 Trying to find more from Maldives
Found 4 centers in Maldives during country search.
Ce

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Write headers for city, state, country, and up to 20 yoga centers
    header = ["City", "State", "Country"]
    for i in range(1, 21):  # Adjusted for 20 centers
        header.extend([f"center{i}name", f"center{i}website", f"center{i}review"])

    # Create a DataFrame to ensure the sheet is created properly
    df = pd.DataFrame(columns=header)  # Initial empty DataFrame with correct header

    # Write this empty DataFrame first to create the sheet with the correct structure
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        # Initialize counters for found centers
        centers_found = 0
        added_centers = 0
        print(f"Searching for centers in {city}, {state}, {country}")

        # Process the first search response (city, state, country)
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Only include centers that have a website
                if website != "N/A":
                    # Add center to the list (we'll check later if it's unique)
                    city_centers.append((name, website, place_id))

                # Stop adding centers after the first 20 valid ones
                if len(city_centers) >= 20:
                    break

        # Print the number of centers found during city search
        print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")
        # Print details of the centers found in city search
        for center in city_centers:
            name, website, place_id = center
            print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

        # If there are less than 20 centers from the city search, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Process the country search response
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Only include centers that have a website
                    if website != "N/A":
                        # Add center to the list (we'll check later if it's unique)
                        country_centers.append((name, website, place_id))

                    # Stop adding centers after reaching 20
                    if len(country_centers) + len(city_centers) >= 20:
                        break

            # Print the number of centers found during country search
            print(f"Found {len(country_centers)} centers in {country} during country search.")
            # Print details of the centers found in country search
            for center in country_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Only add the first 20 unique centers with websites
            seen_place_ids = set()
            for name, website, place_id in all_centers:
                if place_id and place_id not in seen_place_ids:
                    seen_place_ids.add(place_id)
                    row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])
                    centers_found += 1
                    added_centers += 1

                if centers_found >= 20:
                    break

        # Fill in the missing center details with "N/A" if there are less than 20 valid centers
        while centers_found < 20:
            row_data.extend(["N/A", "N/A", "N/A"])
            centers_found += 1

        # Append row data to the list of all rows
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {centers_found} centers found, "
              f"{added_centers} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


In [9]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Write headers for city, state, country, and up to 20 yoga centers
    header = ["City", "State", "Country"]
    for i in range(1, 21):  # Adjusted for 20 centers
        header.extend([f"center{i}name", f"center{i}website", f"center{i}review"])

    # Create a DataFrame to ensure the sheet is created properly
    df = pd.DataFrame(columns=header)  # Initial empty DataFrame with correct header

    # Write this empty DataFrame first to create the sheet with the correct structure
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        # Initialize counters for found centers
        centers_found = 0
        added_centers = 0
        print(f"Searching for centers in {city}, {state}, {country}")

        # Process the first search response (city, state, country)
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Only include centers that have a website
                if website != "N/A":
                    # Add center to the list (we'll check later if it's unique)
                    city_centers.append((name, website, place_id))

        # Print the number of centers found during city search
        print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")
        # Print details of the centers found in city search
        for center in city_centers:
            name, website, place_id = center
            print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

        # If there are less than 20 centers from the city search, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Process the country search response
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Only include centers that have a website
                    if website != "N/A":
                        # Add center to the list (we'll check later if it's unique)
                        country_centers.append((name, website, place_id))

            # Print the number of centers found during country search
            print(f"Found {len(country_centers)} centers in {country} during country search.")
            # Print details of the centers found in country search
            for center in country_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Only add the first 20 unique centers with websites
            seen_place_ids = set()
            for name, website, place_id in all_centers:
                if place_id and place_id not in seen_place_ids:
                    seen_place_ids.add(place_id)
                    row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])
                    centers_found += 1
                    added_centers += 1

                if centers_found >= 20:
                    break

        # Fill in the missing center details with "N/A" if there are less than 20 valid centers
        while centers_found < 20:
            row_data.extend(["N/A", "N/A", "N/A"])
            centers_found += 1

        # Append row data to the list of all rows
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {centers_found} centers found, "
              f"{added_centers} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (8).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 2 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: Holiday Cottage Thoddoo, Website: http://www.holidaycottage.com.mv/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJhbYr-CZEQDsR_Htxwj2GZF0
Center: Ahola Thoddoo, Website: http://www.aholathoddoo.com/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJcei0-MxFQDsRXj-FaFPLo4g
Only 2 centers found in Thoddoo, Alif Alif Atoll, Maldives.
 Trying to find more from Maldives
Found 4 centers in Maldives during country search.
Center: Holiday Cottage Thoddoo, Website: http://www.holidaycottage.com.mv/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJhbYr-CZEQDsR_Htxwj2GZF0
Center: Ahola Thoddoo, Website: http://www.aholathoddoo.com/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJcei0-MxFQDsRXj-FaFPLo4g
Center: Kingsway Thoddoo, Websit

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [10]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Write headers for city, state, country, and yoga centers
    header = ["City", "State", "Country", "Center Name", "Website", "Review URL"]

    # Create a DataFrame to ensure the sheet is created properly
    df = pd.DataFrame(columns=header)  # Initial empty DataFrame with correct header

    # Write this empty DataFrame first to create the sheet with the correct structure
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        print(f"Searching for centers in {city}, {state}, {country}")

        # Collect all centers found in city search
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Only add centers that have a website
                if website != "N/A":
                    if place_id and place_id not in seen_center_ids:
                        seen_center_ids.add(place_id)
                        city_centers.append((name, website, place_id))

        # Print the number of centers found during city search
        print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")

        # Print details of the centers found in city search
        for center in city_centers:
            name, website, place_id = center
            print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

        # If there are less than needed centers, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Collect centers found in the country search
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Only add centers that have a website
                    if website != "N/A":
                        if place_id and place_id not in seen_center_ids:
                            seen_center_ids.add(place_id)
                            country_centers.append((name, website, place_id))

            # Print the number of centers found during country search
            print(f"Found {len(country_centers)} centers in {country} during country search.")
            # Print details of the centers found in country search
            for center in country_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Add all centers to the row
            for name, website, place_id in all_centers:
                row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        else:
            # Add city centers to the row if 20 or more centers are found in city search
            for name, website, place_id in city_centers:
                row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        # Append the row data with all found centers to the list of all rows
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {len(row_data) // 3} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)



Saving locations.xlsx to locations (9).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 2 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: Ahola Thoddoo, Website: http://www.aholathoddoo.com/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJcei0-MxFQDsRXj-FaFPLo4g
Center: Holiday Cottage Thoddoo, Website: http://www.holidaycottage.com.mv/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJhbYr-CZEQDsR_Htxwj2GZF0
Only 2 centers found in Thoddoo, Alif Alif Atoll, Maldives.
 Trying to find more from Maldives
Found 2 centers in Maldives during country search.
Center: Maldives Fitness Yoga trips, Website: https://t.me/zumbamaldives, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJf4i67IYRFiQRbWGu1jfcHvg
Center: Kingsway Thoddoo, Website: https://www.hotelscheck-in.com/kingswaythoddoomaldives/en/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJ0Z4y0atFQDsROj7CrRzurP0
Ro

ValueError: 6 columns passed, passed data had 18 columns

In [11]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Write headers for city, state, country, and yoga centers
    header = ["City", "State", "Country", "Center Name", "Website", "Review URL"]

    # Create a DataFrame to ensure the sheet is created properly
    df = pd.DataFrame(columns=header)  # Initial empty DataFrame with correct header

    # Write this empty DataFrame first to create the sheet with the correct structure
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        print(f"Searching for centers in {city}, {state}, {country}")

        # Collect all centers found in city search (including those with and without websites)
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Add all centers, regardless of whether they have a website
                if place_id and place_id not in seen_center_ids:
                    seen_center_ids.add(place_id)
                    city_centers.append((name, website, place_id))

        # Print the number of centers found during city search
        print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")

        # Print details of the centers found in city search
        for center in city_centers:
            name, website, place_id = center
            print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

        # If there are less than needed centers, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Collect centers found in the country search (including those with and without websites)
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Add all centers, regardless of whether they have a website
                    if place_id and place_id not in seen_center_ids:
                        seen_center_ids.add(place_id)
                        country_centers.append((name, website, place_id))

            # Print the number of centers found during country search
            print(f"Found {len(country_centers)} centers in {country} during country search.")
            # Print details of the centers found in country search
            for center in country_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Add all centers to the row
            for name, website, place_id in all_centers:
                row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        else:
            # Add city centers to the row if 20 or more centers were found in city search
            for name, website, place_id in city_centers:
                row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        # Append the row data with all found centers to the list of all rows
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {len(row_data) // 3} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)



Saving locations.xlsx to locations (10).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 7 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: V-FITNESS THODDOO, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJIWOwsANFQDsRfArGTW46-4A
Center: Thoddoo Island Life, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJp9bhqOBFQDsRd8CWC7WFZuM
Center: Thoddoo Inn, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJPXSN8CNEQDsRXQ5IXnNm8uI
Center: Veli Thoddoo Inn Maldives, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJTwI9RSZEQDsRblKOCc_yVYw
Center: Ithaa Thoddoo Inn`, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJGSmf_iZEQDsRq7wdU5fK3z4
Center: Ahola Thoddoo, Website: http://www.aholathoddoo.com/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJcei0-MxFQDsRXj-FaFPLo4g
Center: Hol

ValueError: 6 columns passed, passed data had 42 columns

In [12]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Initialize base header for city, state, country
    base_header = ["City", "State", "Country"]

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        print(f"Searching for centers in {city}, {state}, {country}")

        # Collect all centers found in city search (including those with and without websites)
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Add all centers, regardless of whether they have a website
                if place_id and place_id not in seen_center_ids:
                    seen_center_ids.add(place_id)
                    city_centers.append((name, website, place_id))

        # Print the number of centers found during city search
        print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")

        # Print details of the centers found in city search
        for center in city_centers:
            name, website, place_id = center
            print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

        # If there are less than needed centers, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Collect centers found in the country search (including those with and without websites)
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Add all centers, regardless of whether they have a website
                    if place_id and place_id not in seen_center_ids:
                        seen_center_ids.add(place_id)
                        country_centers.append((name, website, place_id))

            # Print the number of centers found during country search
            print(f"Found {len(country_centers)} centers in {country} during country search.")
            # Print details of the centers found in country search
            for center in country_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Add all centers to the row
            for name, website, place_id in all_centers:
                row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        else:
            # Add city centers to the row if 20 or more centers were found in city search
            for name, website, place_id in city_centers:
                row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        # Ensure the header reflects the correct number of columns based on the number of centers
        center_columns = len(row_data) - 3  # Subtract 3 for city, state, and country columns
        num_centers = center_columns // 3  # Each center has 3 columns (name, website, review URL)

        # Update header dynamically
        dynamic_header = base_header.copy()
        for i in range(1, num_centers + 1):
            dynamic_header.extend([f"Center {i} Name", f"Center {i} Website", f"Center {i} Review URL"])

        # Ensure the row has the correct number of columns (matching the dynamic header)
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {len(row_data) // 3} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=dynamic_header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (11).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 13 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: Thoddoo Island Life, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJp9bhqOBFQDsRd8CWC7WFZuM
Center: Ari Heaven Thoddoo Maldives, Website: https://www.hotelscheck-in.com/ariheaventhodoomaldives/en/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJq3gXrSVEQDsRiw7M_mbYsp0
Center: V-FITNESS THODDOO, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJIWOwsANFQDsRfArGTW46-4A
Center: Athiri Veli, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJ2a4R-yZEQDsRgTJ3PEmKT5w
Center: Veli Thoddoo Inn Maldives, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJTwI9RSZEQDsRblKOCc_yVYw
Center: Evila Inn Thoddoo, Website: N/A, Review URL: https://www.google.com/maps/place/?q=plac

IndexError: At least one sheet must be visible

In [14]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Initialize base header for city, state, country
    base_header = ["City", "State", "Country"]

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Try the first search (city, state, country)
        response_json = None
        try:
            # API Connection
            conn = http.client.HTTPSConnection("google.serper.dev")
            payload = json.dumps({
                "q": query,
                "gl": "AR"  # Modify location as needed
            })
            headers = {
                'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                'Content-Type': 'application/json'
            }

            # Send request
            conn.request("POST", "/maps", payload, headers)
            res = conn.getresponse()
            data = res.read()
            response_json = json.loads(data)

            if 'statusCode' in response_json and response_json['statusCode'] == 403:
                print(f"Response from first search for {query}: {response_json}")
                raise Exception(f"Unauthorized access for {query}")

        except Exception as e:
            print(f"Error in first search for {query}: {e}")

        print(f"Searching for centers in {city}, {state}, {country}")

        # Collect all centers found in city search (including those with and without websites)
        city_centers = []
        if response_json and 'places' in response_json and len(response_json['places']) > 0:
            for place in response_json['places']:
                name = place.get("title", "N/A")
                website = place.get("website", "N/A")
                place_id = place.get("placeId", None)

                # Add all centers, regardless of whether they have a website
                if place_id and place_id not in seen_center_ids:
                    seen_center_ids.add(place_id)
                    city_centers.append((name, website, place_id))

        # Print the number of centers found during city search
        print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")

        # Print details of the centers found in city search
        for center in city_centers:
            name, website, place_id = center
            print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

        # If there are less than needed centers, perform the country search
        if len(city_centers) < 20:
            print(f"Only {len(city_centers)} centers found in {city}, {state}, {country}.\n Trying to find more from {country}")

            # Retry search for the country if fewer than 20 centers were found
            query = f"yoga in {country}"  # Searching only by country if city search was insufficient

            # Send the same API request but for the country
            try:
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from second search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in second search for {query}: {e}")

            # Collect centers found in the country search (including those with and without websites)
            country_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Add all centers, regardless of whether they have a website
                    if place_id and place_id not in seen_center_ids:
                        seen_center_ids.add(place_id)
                        country_centers.append((name, website, place_id))

            # Print the number of centers found during country search
            print(f"Found {len(country_centers)} centers in {country} during country search.")
            # Print details of the centers found in country search
            for center in country_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Combine city and country centers
            all_centers = city_centers + country_centers

            # Add all centers to the row
            for name, website, place_id in all_centers:
                row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        else:
            # Add city centers to the row if 20 or more centers were found in city search
            for name, website, place_id in city_centers:
                row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        # Ensure the header reflects the correct number of columns based on the number of centers
        center_columns = len(row_data) - 3  # Subtract 3 for city, state, and country columns
        num_centers = center_columns // 3  # Each center has 3 columns (name, website, review URL)

        # Update header dynamically based on the number of centers
        dynamic_header = base_header.copy()
        for i in range(1, num_centers + 1):
            dynamic_header.extend([f"Center {i} Name", f"Center {i} Website", f"Center {i} Review URL"])

        # Ensure the row has the correct number of columns (matching the dynamic header)
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {len(row_data) // 3} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=dynamic_header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (13).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 7 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: V-FITNESS THODDOO, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJIWOwsANFQDsRfArGTW46-4A
Center: Veli Thoddoo Inn Maldives, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJTwI9RSZEQDsRblKOCc_yVYw
Center: Thoddoo Island Life, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJp9bhqOBFQDsRd8CWC7WFZuM
Center: Thoddoo Inn, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJPXSN8CNEQDsRXQ5IXnNm8uI
Center: Ithaa Thoddoo Inn`, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJGSmf_iZEQDsRq7wdU5fK3z4
Center: Ahola Thoddoo, Website: http://www.aholathoddoo.com/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJcei0-MxFQDsRXj-FaFPLo4g
Center: Hol

IndexError: At least one sheet must be visible

In [15]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Initialize base header for city, state, country
    base_header = ["City", "State", "Country"]

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Keep a flag for pagination
        next_page_token = None

        while True:
            response_json = None
            try:
                # API Connection
                conn = http.client.HTTPSConnection("google.serper.dev")
                payload = json.dumps({
                    "q": query,
                    "gl": "AR",  # Modify location as needed
                    "next_page_token": next_page_token if next_page_token else ""
                })
                headers = {
                    'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                    'Content-Type': 'application/json'
                }

                # Send request
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from first search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in first search for {query}: {e}")

            print(f"Searching for centers in {city}, {state}, {country}")

            # Collect all centers found in city search (including those with and without websites)
            city_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Add all centers, regardless of whether they have a website
                    if place_id and place_id not in seen_center_ids:
                        seen_center_ids.add(place_id)
                        city_centers.append((name, website, place_id))

            # Print the number of centers found during city search
            print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")

            # Print details of the centers found in city search
            for center in city_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Check for the next page (pagination)
            next_page_token = response_json.get("next_page_token", None)
            if not next_page_token:
                break  # No more pages to fetch, exit the loop

            # Wait for a while before requesting the next page (Google Maps API has a delay before the next page is available)
            print("Waiting for next page to be available...")
            time.sleep(3)  # Wait 3 seconds before requesting the next page

        # Add city centers to the row
        for name, website, place_id in city_centers:
            row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        # Ensure the header reflects the correct number of columns based on the number of centers
        center_columns = len(row_data) - 3  # Subtract 3 for city, state, and country columns
        num_centers = center_columns // 3  # Each center has 3 columns (name, website, review URL)

        # Update header dynamically based on the number of centers
        dynamic_header = base_header.copy()
        for i in range(1, num_centers + 1):
            dynamic_header.extend([f"Center {i} Name", f"Center {i} Website", f"Center {i} Review URL"])

        # Ensure the row has the correct number of columns (matching the dynamic header)
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {len(row_data) // 3} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=dynamic_header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (14).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 10 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: V-FITNESS THODDOO, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJIWOwsANFQDsRfArGTW46-4A
Center: Thoddoo Island Life, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJp9bhqOBFQDsRd8CWC7WFZuM
Center: Veli Thoddoo Inn Maldives, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJTwI9RSZEQDsRblKOCc_yVYw
Center: Thoddoo Inn, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJPXSN8CNEQDsRXQ5IXnNm8uI
Center: Holiday Cottage Thoddoo, Website: http://www.holidaycottage.com.mv/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJhbYr-CZEQDsR_Htxwj2GZF0
Center: Ahola Thoddoo, Website: http://www.aholathoddoo.com/, Review URL: https://www.google.com/maps/place/?q=place_id:ChI

IndexError: At least one sheet must be visible

In [16]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Initialize base header for city, state, country
    base_header = ["City", "State", "Country"]

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Keep a flag for pagination
        next_page_token = None

        while True:
            response_json = None
            try:
                # API Connection
                conn = http.client.HTTPSConnection("google.serper.dev")
                payload = json.dumps({
                    "q": query,
                    "gl": "AR",  # Modify location as needed
                    "next_page_token": next_page_token if next_page_token else ""
                })
                headers = {
                    'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                    'Content-Type': 'application/json'
                }

                # Send request
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from first search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in first search for {query}: {e}")

            print(f"Searching for centers in {city}, {state}, {country}")

            # Collect all centers found in city search (including those with and without websites)
            city_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Add all centers, regardless of whether they have a website
                    if place_id and place_id not in seen_center_ids:
                        seen_center_ids.add(place_id)
                        city_centers.append((name, website, place_id))

            # Print the number of centers found during city search
            print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")

            # Print details of the centers found in city search
            for center in city_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Check for the next page (pagination)
            next_page_token = response_json.get("next_page_token", None)
            if not next_page_token:
                break  # No more pages to fetch, exit the loop

            # Wait for a while before requesting the next page (Google Maps API has a delay before the next page is available)
            print("Waiting for next page to be available...")
            time.sleep(3)  # Wait 3 seconds before requesting the next page

        # Add city centers to the row
        for name, website, place_id in city_centers:
            row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        # Ensure the header reflects the correct number of columns based on the number of centers
        center_columns = len(row_data) - 3  # Subtract 3 for city, state, and country columns
        num_centers = center_columns // 3  # Each center has 3 columns (name, website, review URL)

        # Update header dynamically based on the number of centers
        dynamic_header = base_header.copy()
        for i in range(1, num_centers + 1):
            dynamic_header.extend([f"Center {i} Name", f"Center {i} Website", f"Center {i} Review URL"])

        # Ensure the row has the correct number of columns (matching the dynamic header)
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {len(row_data) // 3} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Once all rows are gathered, write the data to the Excel sheet
    df = pd.DataFrame(all_rows, columns=dynamic_header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)



Saving locations.xlsx to locations (15).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 11 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: V-FITNESS THODDOO, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJIWOwsANFQDsRfArGTW46-4A
Center: Thoddoo Island Life, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJp9bhqOBFQDsRd8CWC7WFZuM
Center: Veli Thoddoo Inn Maldives, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJTwI9RSZEQDsRblKOCc_yVYw
Center: Thoddoo Inn, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJPXSN8CNEQDsRXQ5IXnNm8uI
Center: Holiday Cottage Thoddoo, Website: http://www.holidaycottage.com.mv/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJhbYr-CZEQDsR_Htxwj2GZF0
Center: Ahola Thoddoo, Website: http://www.aholathoddoo.com/, Review URL: https://www.google.com/maps/place/?q=place_id:ChI

IndexError: At least one sheet must be visible

In [17]:
import requests
import pandas as pd
from datetime import datetime
import json
import time
from google.colab import files  # For uploading files in Colab
import http.client

# Upload the Excel file
uploaded = files.upload()

# Load the city, state, and country data from the uploaded Excel file
excel_filename = list(uploaded.keys())[0]  # This assumes you upload one file
sheet_name = "Sheet1"  # Change this if your sheet has a different name

# Read the city, state, and country data from the Excel file
cities_df = pd.read_excel(excel_filename, sheet_name=sheet_name)

# Get the current date and time for the output file name
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"/content/result{current_time}.xlsx"  # The output file name will be resultYYYYMMDD_HHMMSS.xlsx

# Create an Excel writer to write the results into an Excel file
with pd.ExcelWriter(csv_filename, engine='openpyxl') as writer:  # Change the engine to 'openpyxl'
    # Create a list to store all the rows for final writing to Excel
    all_rows = []

    # Initialize base header for city, state, country
    base_header = ["City", "State", "Country"]

    # Iterate through each row in the Excel data
    for index, row in cities_df.iterrows():
        total_rows = len(cities_df)  # Total number of rows in the sheet
        print(f"Processing row {index + 1}/{total_rows}...")  # Show the current row number and total rows

        city = row['city']
        state = row['state']
        country = row['country']

        # Construct the search query for yoga studios
        query = f"yoga in {city}, {state}, {country}"

        # Initialize row data with city, state, country
        row_data = [city, state, country]

        # Keep track of seen center IDs to avoid duplicates in this row
        seen_center_ids = set()

        # Keep a flag for pagination
        next_page_token = None

        while True:
            response_json = None
            try:
                # API Connection
                conn = http.client.HTTPSConnection("google.serper.dev")
                payload = json.dumps({
                    "q": query,
                    "gl": "AR",  # Modify location as needed
                    "next_page_token": next_page_token if next_page_token else ""
                })
                headers = {
                    'X-API-KEY': 'abfe315b7f9e4392041a4aff90c4ccd4ebcdfad4',  # Use your valid API key
                    'Content-Type': 'application/json'
                }

                # Send request
                conn.request("POST", "/maps", payload, headers)
                res = conn.getresponse()
                data = res.read()
                response_json = json.loads(data)

                if 'statusCode' in response_json and response_json['statusCode'] == 403:
                    print(f"Response from first search for {query}: {response_json}")
                    raise Exception(f"Unauthorized access for {query}")

            except Exception as e:
                print(f"Error in first search for {query}: {e}")

            print(f"Searching for centers in {city}, {state}, {country}")

            # Collect all centers found in city search (including those with and without websites)
            city_centers = []
            if response_json and 'places' in response_json and len(response_json['places']) > 0:
                for place in response_json['places']:
                    name = place.get("title", "N/A")
                    website = place.get("website", "N/A")
                    place_id = place.get("placeId", None)

                    # Add all centers, regardless of whether they have a website
                    if place_id and place_id not in seen_center_ids:
                        seen_center_ids.add(place_id)
                        city_centers.append((name, website, place_id))

            # Print the number of centers found during city search
            print(f"Found {len(city_centers)} centers in {city}, {state}, {country} during city search.")

            # Print details of the centers found in city search
            for center in city_centers:
                name, website, place_id = center
                print(f"Center: {name}, Website: {website}, Review URL: https://www.google.com/maps/place/?q=place_id:{place_id}")

            # Check for the next page (pagination)
            next_page_token = response_json.get("next_page_token", None)
            if not next_page_token:
                break  # No more pages to fetch, exit the loop

            # Wait for a while before requesting the next page (Google Maps API has a delay before the next page is available)
            print("Waiting for next page to be available...")
            time.sleep(3)  # Wait 3 seconds before requesting the next page

        # Add city centers to the row
        for name, website, place_id in city_centers:
            row_data.extend([name, website, f"https://www.google.com/maps/place/?q=place_id:{place_id}"])

        # Ensure the header reflects the correct number of columns based on the number of centers
        center_columns = len(row_data) - 3  # Subtract 3 for city, state, and country columns
        num_centers = center_columns // 3  # Each center has 3 columns (name, website, review URL)

        # Update header dynamically based on the number of centers
        dynamic_header = base_header.copy()
        for i in range(1, num_centers + 1):
            dynamic_header.extend([f"Center {i} Name", f"Center {i} Website", f"Center {i} Review URL"])

        # Ensure the row has the correct number of columns (matching the dynamic header)
        all_rows.append(row_data)

        # Print stats after each row
        print(f"Row {index + 1} processed: {len(row_data) // 3} centers added to the row.")

        # Be respectful of rate limits by pausing between requests if needed
        time.sleep(1)  # Adjust sleep time based on API rate limits

    # Write the dynamic header once, and then the data
    df = pd.DataFrame(all_rows, columns=dynamic_header)
    df.to_excel(writer, index=False, sheet_name='Yoga Centers')

print(f"Scraped results from {len(cities_df)} cities and saved to {csv_filename}")

# Use the following command to download the file to your local machine
files.download(csv_filename)


Saving locations.xlsx to locations (16).xlsx
Processing row 1/5...
Searching for centers in Thoddoo, Alif Alif Atoll, Maldives
Found 13 centers in Thoddoo, Alif Alif Atoll, Maldives during city search.
Center: Thoddoo Island Life, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJp9bhqOBFQDsRd8CWC7WFZuM
Center: Ari Heaven Thoddoo Maldives, Website: https://www.hotelscheck-in.com/ariheaventhodoomaldives/en/, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJq3gXrSVEQDsRiw7M_mbYsp0
Center: V-FITNESS THODDOO, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJIWOwsANFQDsRfArGTW46-4A
Center: Athiri Veli, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJ2a4R-yZEQDsRgTJ3PEmKT5w
Center: Veli Thoddoo Inn Maldives, Website: N/A, Review URL: https://www.google.com/maps/place/?q=place_id:ChIJTwI9RSZEQDsRblKOCc_yVYw
Center: Evila Inn Thoddoo, Website: N/A, Review URL: https://www.google.com/maps/place/?q=plac

IndexError: At least one sheet must be visible