In [19]:
import os

# Load a specific environment variable
GKGraph_API_KEY = os.environ.get('GKGraph_API_KEY')

In [3]:
# KnowledgeGraphTool
import requests

class KnowledgeGraphTool:
    def __init__(self, api_key):
        """
        Initializes the KnowledgeGraphTool with the provided API key.

        Parameters:
        api_key (str): The API key for accessing the Google Knowledge Graph API.
        """
        self.api_key = api_key
        self.base_url = "https://kgsearch.googleapis.com/v1/entities:search"

    def search(self, query):
        """
        Sends a search request to the Google Knowledge Graph API with the given query.

        Parameters:
        query (str): The user's query to search for in the Knowledge Graph.

        Returns:
        dict: The JSON response from the API if the request is successful.
        """
        params = {
            'query': query,
            'key': self.api_key,
            'limit': 1,
            'indent': True
        }
        response = requests.get(self.base_url, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            response.raise_for_status()

    def extract_info(self, response):
        """
        Extracts and returns the relevant information from the API response.

        Parameters:
        response (dict): The JSON response from the Knowledge Graph API.

        Returns:
        dict: A dictionary containing the name, description, and detailed description 
              from the response. Returns None if no relevant information is found.
        """
        if 'itemListElement' in response and len(response['itemListElement']) > 0:
            element = response['itemListElement'][0]['result']
            name = element.get('name', 'N/A')
            description = element.get('description', 'N/A')
            detailed_description = element.get('detailedDescription', {}).get('articleBody', 'N/A')
            return {
                'name': name,
                'description': description,
                'detailed_description': detailed_description
            }
        else:
            return None

def main():
    """
    The main function that handles user input, calls the search and extraction methods,
    and prints the results.
    """
    api_key = GKGraph_API_KEY
    tool = KnowledgeGraphTool(api_key)
    query = input("Enter your query: ")
    response = tool.search(query)
    info = tool.extract_info(response)
    if info:
        print(f"Name: {info['name']}")
        print(f"Description: {info['description']}")
        print(f"Detailed Description: {info['detailed_description']}")
    else:
        print("No information found.")

if __name__ == "__main__":
    main()


Enter your query: Tim hortons
Name: Tim Hortons
Description: Restaurant chain
Detailed Description: Tim Hortons Inc., known colloquially as Tim's, Timmies, or Timmy's, is a Canadian multinational coffeehouse and restaurant chain with headquarters in Toronto; it serves coffee, donuts, sandwiches, breakfast egg muffins and other fast-food items. 


In [20]:
import requests

class NearbyPlacesTool:
    def __init__(self, api_key):
        """
        Initializes the NearbyPlacesTool with the provided API key.

        Parameters:
        api_key (str): The API key for accessing the Google Places API.
        """
        self.api_key = api_key
        self.base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    def search(self, location, place_type, radius=1000):
        """
        Sends a search request to the Google Places API to find nearby places of a given type.

        Parameters:
        location (str): The location (latitude,longitude) around which to search for places.
        place_type (str): The type of place to search for (e.g., restaurant, cafe).
        radius (int): The radius in meters within which to search for places (default is 1000 meters).

        Returns:
        dict: The JSON response from the API if the request is successful.
        """
        params = {
            'location': location,
            'type': place_type,
            'radius': radius,
            'key': self.api_key
        }
        response = requests.get(self.base_url, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            response.raise_for_status()

    def extract_info(self, response):
        """
        Extracts and returns the relevant information from the API response.

        Parameters:
        response (dict): The JSON response from the Google Places API.

        Returns:
        list: A list of dictionaries containing the name, address, and rating of each place.
        """
        places_info = []
        if 'results' in response and len(response['results']) > 0:
            for place in response['results']:
                name = place.get('name', 'N/A')
                address = place.get('vicinity', 'N/A')
                rating = place.get('rating', 'N/A')
                places_info.append({
                    'name': name,
                    'address': address,
                    'rating': rating
                })
        return places_info

def main():
    """
    The main function that handles user input, calls the search and extraction methods,
    and prints the results.
    """
    api_key = GKGraph_API_KEY
    tool = NearbyPlacesTool(api_key)
    location = input("Enter the location (latitude,longitude): ")
    place_type = input("Enter the type of place (e.g., restaurant, cafe): ")
    radius = input("Enter the radius in meters (default is 1000): ")
    radius = int(radius) if radius else 1000
    response = tool.search(location, place_type, radius)
    places_info = tool.extract_info(response)
    if places_info:
        for place in places_info:
            print(f"Name: {place['name']}")
            print(f"Address: {place['address']}")
            print(f"Rating: {place['rating']}\n")
    else:
        print("No nearby places found.")

if __name__ == "__main__":
    main()


Enter the location (latitude,longitude): 43.941888,-78.8955136
Enter the type of place (e.g., restaurant, cafe): cafe
Enter the radius in meters (default is 1000): 5000
Name: Tim Hortons
Address: 1251 Simcoe Street North, Oshawa
Rating: 3.6

Name: Tim Hortons
Address: 4051 Thickson Road North, Whitby
Rating: 3.7

Name: Coffee Culture Café & Eatery
Address: 555 Rossland Road East, Oshawa
Rating: 4.3

Name: Tim Hortons
Address: Durham College, Gordon Willey Building, Upper Level, Oshawa
Rating: 3.8

Name: Country Style
Address: Canadian Tire, 1333 Wilson Road North, Oshawa
Rating: 3.4

Name: Coffee Time
Address: 500 Rossland Road West, Oshawa
Rating: 4.1

Name: Tim Hortons
Address: 1361 Harmony Road North, Oshawa
Rating: 3.7

Name: Tim Hortons
Address: 1471 Harmony Road North, Oshawa
Rating: 3.6

Name: Tim Hortons
Address: 1311 Harmony Road North, Oshawa
Rating: 3.7

Name: Tim Hortons
Address: 575 Thornton Road North, Oshawa
Rating: 4

Name: Tim Hortons
Address: 520 Winchester Road East,

In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import json
import time

def scrape_tim_hortons(url):
    # Set up the Selenium WebDriver (Chrome in this case)
    driver = webdriver.Chrome()  # You can specify the path to your ChromeDriver if it's not in your PATH
    driver.get(url)
    
    # Give time for the page to load
    time.sleep(5)  # Adjust this sleep time as needed
    
    # Find the elements containing the service data
    # This part might need adjustments based on the actual HTML structure
    services = driver.find_elements(By.CSS_SELECTOR, 'div[role="article"]')
    
    services_data = []
    for service in services:
        try:
            name = service.find_element(By.CSS_SELECTOR, 'div[jsan="7.tAeYtd"]').text
            address = service.find_element(By.CSS_SELECTOR, 'span[jsan="7.QsDR1c"]').text
            services_data.append({
                'name': name,
                'address': address
            })
        except:
            continue
    
    # Close the driver
    driver.quit()
    
    return services_data

# URL of the Google Maps search results
url = 'https://www.google.ca/maps/search/Tim+Hortons/@43.9419461,-78.8955136,14z/data=!3m1!4b1?entry=ttu'

# Scrape the services data
services_data = scrape_tim_hortons(url)

# Convert the data to JSON format
services_json = json.dumps(services_data, indent=4)

# Print the JSON data
print(services_json)


[]


In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import random
import string
import re
import json
import Wikipadia_Scrapper
import warnings
warnings.filterwarnings("ignore")


def generate_random_email():
    """
    Generates a randomized email address by combining a random username with a predefined domain. 
    The generated email address serves as a unique identifier for data extraction processes.

    Returns:
        String : random email address
    """
    # Define the domain name for the email (you can change this to your desired domain)
    domain = "test.com"

    # Generate a random username with a length between 5 and 10 characters
    username_length = random.randint(5, 10)
    username = ''.join(random.choices(
        string.ascii_letters + string.digits, k=username_length))

    # Combine the username and domain to form the email address
    email = f"{username}@{domain}"
    return email


def extract_place_id(url):
    """
    Extracts and returns the unique place ID from a given URL using a regular expression pattern. 
    This function is particularly useful for obtaining the place ID associated with a Google Maps link.

    Args:
        url (String): service url

    Returns:
        String: place unique id
    """
    pattern = r'placeid=([a-zA-Z0-9\-]+)'
    match = re.search(pattern, url)
    if match:
        return match.group(1)
    else:
        return ""


def get_place_unique_ID(service_name, service_address, browser):
    """
    Utilizes a web browser instance to navigate to a review link generator website, performs a series of user interactions 
    and form submissions to retrieve a unique place ID associated with a specific service name and address combination. 
    Returns the obtained place ID or an empty string if unsuccessful.

    Args:
        service_name (String): name of service
        service_address (String): address of service 
        browser (Object): google chrome selenium driver

    Returns:
        String: place unique id
    """
    try:
        url = "https://reviewsonmywebsite.com/google-review-link-generator"
        browser.get(url)

        search_element = browser.find_element(
            By.XPATH, "/html/body/main/section[1]/div/div/div/div[2]/div[1]/div[1]/div[1]/fieldset/input")
        search_element.send_keys(service_name + " " + service_address)
        time.sleep(2)
        auto_complate_element = browser.find_element(
            By.XPATH, "/html/body/main/section[1]/div/div/div/div[2]/div[1]/div[1]/div[1]/fieldset/ul/li/a")

        auto_complate_element.click()
        generate_button_one = browser.find_element(
            By.XPATH, "/html/body/main/section[1]/div/div/div/div[2]/div[1]/div[1]/div[2]/button")

        generate_button_one.click()
        time.sleep(1)

        email_element = browser.find_element(
            By.XPATH, "/html/body/main/section[1]/div/div/div/div[2]/div[2]/div[3]/div/input")
        email_element.send_keys(generate_random_email())

        generate_button_two = browser.find_element(
            By.XPATH, "/html/body/main/section[1]/div/div/div/div[2]/div[2]/div[3]/button")
        generate_button_two.click()

        time.sleep(1)

        result_element = browser.find_element(
            By.XPATH, "/html/body/main/section[1]/div/div/div/div[2]/div[2]/div[4]/div/div/input")
        result = result_element.get_attribute("value")
        return result
    except:
        return ""


def get_service_address(service_URL, browser):
    """
    Uses a web browser instance to visit a service URL and extracts the service address by parsing relevant elements on the page.

    Args:
        service_URL (String): service url
        browser (Object): google chrome selenium driver

    Returns:
        String: address of service
    """
    try:
        browser.get(service_URL)
        data_item_id = browser.find_elements(
            By.XPATH, "//button[@data-item-id='address']")
        for div in data_item_id:
            if div.get_attribute('aria-label'):
                address = div.get_attribute('aria-label')
                return address[address.find(":") + 2:-1]+', Canada'
    except:
        return ""


def get_service_about_type(service_URL, browser):
    """
    Visits a service URL through a web browser instance and extracts descriptive information about the service's background and details.
    This extracted information enriches the dataset by providing insights into the service's offerings which will effect the chatbot responses.
    Args:
        service_URL (String):  service url
        browser (Object):  google chrome selenium driver

    Returns:
        String: description about service
    """
    service_type = None
    about_service = None
    try:
        browser.get(service_URL)
        service_type_element = browser.find_element(By.XPATH, "/html/body/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[2]/span/span/button")
        service_type = service_type_element.text
        print("Service Type: " + service_type)

        time.sleep(0.1)

        browser.get(service_URL)
        time.sleep(0.1)
        
        about_element = browser.find_element(
            By.XPATH, "/html/body/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[6]/button/div[2]/div[1]/div[1]")
        about_service =  about_element.text
        print("About: " + about_service)
        
            
    except Exception as e:
        print(print(e))
        about_service = ""
        service_type = ""

    return about_service, service_type


def get_service_name_URL(Service, coordinates, K=15):
    """
    Constructs a Google Maps search URL based on the provided service name and coordinates, then uses a web browser instance to 
    retrieve a list of service names and URLs associated with the search. Iteratively collects relevant data, including service names 
    and corresponding URLs.

    Args:
        Service (String): type of service
        coordinates (String): coordinates of search
        K (int, optional): _description_. Defaults to 15.

    Returns:
        Panda's Dataframe: dataframe contains names, and urls of service from given type around the given coordinates
    """
    url = 'https://www.google.ca/maps/search/'
    service = Service.replace(" ", "+")
    temp_url = url + service + "/@" + coordinates + ",11.54z"
    browser = webdriver.Chrome()

    services_list = []
    browser.get(temp_url)

    for j in range(K):
        try:
            a_tag_elements = browser.find_elements(By.TAG_NAME, "a")
            for a_tag_element in a_tag_elements:
                if a_tag_element.get_attribute("aria-label") and a_tag_element.get_attribute('href'):
                    bar = a_tag_element

                    bar.location_once_scrolled_into_view
                    service_name = a_tag_element.get_attribute("aria-label")
                    service_url = a_tag_element.get_attribute("href")

                    data = {}
                    data["Service Name"] = service_name
                    data["Service URL"] = service_url
                    services_list.append(data)

        except:
            pass

    browser.quit()
    df = pd.DataFrame(services_list)
    df = df.drop_duplicates(subset=["Service URL"])
    df = df[df["Service URL"].str.contains('www.google.ca')]
    return df


def get_rate_popular_time(url):
    """
    Uses a web browser instance to access a given URL and extracts information related to service ratings, popular times for each day 
    of the week, and opening/closing times. 

    Args:
        url (String): google maps url of the service

    Returns:
        dict : popular time of service for the 7 days
        list : opening and closing time for service
        float : rate of service
    """
    try:
        week_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', ]
        data = {}

        browser = webdriver.Chrome()
        browser.get(url)

        time.sleep(1)
        data_item_id = browser.find_elements(
            By.XPATH, "//button[@data-item-id='address']")

        try:
            rate_element = browser.find_element(
                By.XPATH, "/html/body/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/span[1]/span[1]")
            rate = rate_element.text
        except:
            rate = None

        popular_time_list = []

        for div in data_item_id:
            if div.get_attribute('aria-label'):
                div_tags = browser.find_elements(By.TAG_NAME, "div")

                for div in div_tags:
                    if (div.get_attribute('aria-label')) and ("a.m" in div.get_attribute('aria-label') or "p.m" in div.get_attribute('aria-label')):
                        popular_time_list.append(div.get_attribute(
                            "aria-label").replace("\u202f", " ").replace("..", ""))

        if len(popular_time_list) > 0:
            if "Hide open hours for the week" in popular_time_list[0]:
                opening_closing_time = popular_time_list[0].replace(
                    ".. Hide open hours for the week", "")
                popular_time_list.pop(0)

                i = 0
                j = len(popular_time_list) // 7
                for day in week_days:
                    data[day] = popular_time_list[i: j]
                    i = j
                    j = j+(len(popular_time_list)//7)
                return data, opening_closing_time, rate
            else:
                i = 0
                j = len(popular_time_list) // 7
                for day in week_days:
                    data[day] = popular_time_list[i: j]
                    i = j
                    j = j+(len(popular_time_list)//7)
                return data, None, rate
        else:
            return {'Monday': [], 'Tuesday': [], 'Wednesday': [], 'Thursday': [], 'Friday': [], 'Saturday': [], 'Sunday': []}, None, rate
    except:
        return {'Monday': [], 'Tuesday': [], 'Wednesday': [], 'Thursday': [], 'Friday': [], 'Saturday': [], 'Sunday': []}, None, rate


def merge_rate_popular_time_with_dataset_function(dataframe):
    """
    Merges the extracted rate and popular time data with an existing dataset, ensuring compatibility and alignment of the information. 
    This function enhances the dataset by incorporating temporal and rating details for each service.



    Args:
        dataframe (Panda's Dataframe): dataframe that contains popular service name, address, url, place unique id, about, type

    Returns:
        Panda's Dataframe: updated dataframe with popular time, opening/closing time, and rates
    """

    days = ['Monday', 'Tuesday',
            'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

    columns_order = ["Service Type", "Service Name", "Service URL",
                     "About", "Service Address", "Service Place Unique ID"]
    df = dataframe.reindex(columns=columns_order)

    df["Rate"] = None

    for day in days:
        df[day] = None

    df["Opening/Closing Time"] = None

    for i in range(len(df)):
        url = df.iloc[i]["Service URL"]
        popular_time_data, opening_closing_time, rate = get_rate_popular_time(
            url)

        if opening_closing_time is None:
            for day in days:
                day_column_index = df.columns.get_loc(day)
                df.iloc[i, day_column_index] = json.dumps(
                    popular_time_data[day])

        else:
            opening_closing_time_column_index = df.columns.get_loc(
                "Opening/Closing Time")
            df.iloc[i, opening_closing_time_column_index] = opening_closing_time

            for day in days:
                day_column_index = df.columns.get_loc(day)
                df.iloc[i, day_column_index] = json.dumps(
                    popular_time_data[day])

        rate_column_index = df.columns.get_loc("Rate")
        df.iloc[i, rate_column_index] = rate

    return df


def build_dataset_helper_function(service, coordinates, K, browser):
    """
    Builds a dataset by utilizing the functions get_service_name_URL, get_service_address, and get_service_about. Collects 
    service-related information, such as addresses and descriptions, for subsequent integration into a comprehensive dataset.
    """
    df = get_service_name_URL(Service=service, coordinates=coordinates, K=K)
    addresses_list = []
    about_list = []
    service_type_list = []

    for index, row in df.iterrows():
        service_url = row["Service URL"]
        service_address = get_service_address(service_url, browser)
        about_service, service_type = get_service_about_type(service_url, browser)

        addresses_list.append(service_address)
        about_list.append(about_service)
        service_type_list.append(service_type)

    df["Service Type"] = service_type_list
    df["About"] = about_list
    df["Service Address"] = addresses_list

    df.dropna(subset=["Service Address"], inplace=True)
    return df


def build_dataset_function(service, coordinates, K=15):
    """
    Coordinates the construction of a dataset by orchestrating functions to retrieve service-related data, including service names, 
    URLs, addresses, descriptions, and place unique IDs. Integrates temporal and rating information through the 
    merge_rate_popular_time_with_dataset_function.
    """
    browser = webdriver.Chrome()
    df = build_dataset_helper_function(
        service=service, coordinates=coordinates, K=K, browser=browser)

    places_unique_id_list = []

    for index, row in df.iterrows():
        service_name = row["Service Name"]
        service_address = row["Service Address"]

        service_place_unique_id = get_place_unique_ID(
            service_name=service_name, service_address=service_address, browser=browser)
        
        
        places_unique_id_list.append(extract_place_id(service_place_unique_id))

    df["Service Place Unique ID"] = places_unique_id_list
    

    browser.quit()
    time.sleep(1)
    return merge_rate_popular_time_with_dataset_function(df)


# Main Loop
finished_services_list = []
# Path to the output text file
subservices_description = "./subservices_description.txt"

# Read services from a text file and add them to a list
services_privot = ["Starbucks", "Tim Horton's"]
with open("services.txt", "r") as file:
    for line in file:
        service = line.strip()
        services_privot.append(service)

for service in services_privot:
    if service not in finished_services_list and len(finished_services_list) < 1000:
        df = build_dataset_function(service, coordinates="43.6515,-79.3835", K=30)
        finished_services_list.append(service)
        for related_service in df["Service Type"].to_list():
            if related_service not in services_privot:
                services_privot.append(related_service)
        
        # Open the file in append mode
        with open(subservices_description, "a") as file:
            file.write(Wikipadia_Scrapper.generate_description(service) + "\n")  # Write the paragraph
            file.write("---\n")           # Write the separator

    print("Service: " + service)
    print("Pivot len: " + str(len(services_privot)))
    print("Finished Services List: " + str(len(finished_services_list)))

    # Drop rows with None values in 'id' column
    df = df.dropna(subset=["Service Place Unique ID"])

    # Drop rows with duplicate values in 'id' column, keeping only the first occurrence
    df = df.drop_duplicates(subset=["Service Place Unique ID"], keep='first')

    df = df.reset_index(drop=True)
    
    df.to_csv("./Datasets/"+service+"_Dataset.csv", index=False)

[]


In [22]:
import requests

def google_places_text_search(query, api_key):
    url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
    params = {
        'query': query,
        'key': api_key
    }

    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        return None

# Replace 'your_api_key' with your actual Google Maps API key and 'your_query' with your search query.
api_key = GKGraph_API_KEY
# query = 'I want to find middle estern restaurant in NewYork'
query="ChainRestaurant New York"
result = google_places_text_search(query, api_key)

if result:
    for place in result['results']:
        print(place['name'], place['formatted_address'])
else:
    print("Error fetching data from Google Places API")


Raising Cane's Chicken Fingers 1501 Broadway, New York, NY 10036, United States
Chick-fil-A 1180 6th Ave, New York, NY 10036, United States
Chick-fil-A 675 8th Ave, New York, NY 10036, United States
The Smith 1150 Broadway, New York, NY 10001, United States
Arby's 611 8th Ave, New York, NY 10018, United States
Five Guys 43 W 55th St, New York, NY 10019, United States
Applebee's Grill + Bar 234 W 42nd St, New York, NY 10036, United States
Yard House 575 7th Ave, New York, NY 10018, United States
P.F. Chang's 113 University Pl, New York, NY 10003, United States
Chick-fil-A 711 Lexington Ave, New York, NY 10022, United States
Shake Shack Astor Place 20 3rd Ave, New York, NY 10003, United States
Chain Restaurant Total Rewards Association 330 W 38th St, New York, NY 10018, United States
Taco Bell 840 8th Ave, New York, NY 10019, United States
Jollibee 609 8th Ave, New York, NY 10018, United States
Tick Tock Diner NY 481 8th Ave, New York, NY 10001, United States
Junior's Restaurant & Bakery