In [32]:
import requests
import re
from urllib.parse import urlparse

In [2]:
import pandas as pd

In [34]:
def google_search(query, api_key, cse_id, **kwargs):
    url = "https://www.googleapis.com/customsearch/v1"
    params = {
        'q': query,
        'key': api_key,
        'cx': cse_id
    }
    params.update(kwargs)
    response = requests.get(url, params=params)
    return response.json()

In [16]:
import os

In [17]:
# Your API key and Programmable Search Engine ID
api_key = os.environ.get('GOOGLE_CUSTOM_SEARCH_KEY')
cse_id = os.environ.get('CUSTOM_SEARCH_ENGINE')

In [36]:
# Define a function to extract the date from the snippet
def extract_date(snippet):
    # Regular expression pattern to match dates in the format: "MMM dd, yyyy" or "yyyy"
    date_pattern = re.compile(r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?|\d{4}) \d{1,2}, \d{4}|\d{4}')
    match = date_pattern.search(snippet)
    return match.group(0) if match else "Date not found"


In [37]:
# Function to extract date from snippet using regex
def extract_date(snippet):
    # Regex pattern for various date formats
    date_pattern = re.compile(r'\b(?:\d{1,2} (?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?) \d{4}|'  # dd MMM yyyy
                               r'(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?) \d{1,2} \d{4}|'  # MMM dd yyyy
                               r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?|\d{4}) \d{1,2}, \d{4}|'  # MMM dd, yyyy
                               r'\d{4})')  # yyyy
    match = date_pattern.search(snippet)
    return match.group(0) if match else "Date not found"

In [38]:
# Define a function to extract the publisher from the URL
def extract_publisher(url):
    domain = urlparse(url).netloc
    # Remove 'www.' prefix if it exists
    if domain.startswith('www.'):
        domain = domain[4:]
    # Extract publisher name before the first dot
    publisher = domain.split('.')[0]
    return publisher.capitalize()

In [39]:
from bs4 import BeautifulSoup

def get_page_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        paragraphs = soup.find_all('p')
        content = ' '.join([p.get_text() for p in paragraphs])
        return content
    else:
        return None

In [40]:
search_query = 'Top trending tourist spots in Pakistan'

In [41]:
# Example search
results = google_search(search_query, api_key, cse_id, num=4)

In [42]:
content = ''

# Process and print the results
for item in results.get('items', []):
    title = item.get('title', 'No title')
    snippet = item.get('snippet', 'No snippet')
    link = item.get('link', 'No link')
    
    # Extract the date from the snippet
    date = extract_date(snippet)
    publisher = extract_publisher(link)
    
    print(f"Title: {title}")
    print(f"Link: {link}")
    print(f"Date: {date}")
    print(f"Publisher: {publisher}")
    print(f"Content: ")
    try:
        content += get_page_content(link)
        # print(content)
    except:
        print("Failed to extract content")

    print('-' * 80)

Title: 11 best places to visit in Pakistan - Lonely Planet
Link: https://www.lonelyplanet.com/articles/best-places-to-visit-in-pakistan
Date: Aug 13, 2023
Publisher: Lonelyplanet
Content: 
Aug 13, 2023 • 6 min read The Himalayas in Pakistan have long been an irresistible place to visit for many travelers © Ratnakorn Piyasirisorost / Getty Images The Himalaya and Karakoram mountain ranges have long drawn adventurous tourists to northern Pakistan, but these famous summits are far from the only beautiful places to visit in the country. In fact, there are fascinating places to visit spread right across the country, from the cosmopolitan coastal metropolis of Karachi to the centuries-old Mughal city of Lahore. Our list of best places to visit takes in varied landscapes and architecture, thousands of years of history, and some of the best aspects of Pakistan’s vibrant contemporary culture. Together, it’s a reminder of why Pakistan is one of Asia’s most exciting tourist destinations. It's tim

In [6]:
spots = pd.read_csv('recommended_spots_updated.csv')

spots.head(10)

Unnamed: 0,place,score,image_path
0,Hunza,2.9,images/hunza_valley.jpg
1,Badshaahi,0.8,images/badshaahi_mosque.jpg
2,Wagah,0.7,images/wagah_border.jpg
3,Faisal,0.6,images/faisal_mosque.jpg
4,Margalla,3.5,images/margalla_hills.jpg
5,Fort,2.4,images/lahore_fort.jpg
6,Deosai,1.2,images/deosai_national_park.jpg
7,Dolmen,0.1,images/dolmen_mall.jpg
8,Port,0.4,images/port_grand.jpg
9,Daman-e-Koh,0.5,images/daman-e-koh.jpg


In [58]:
for word in content.split(' '):
    # Do something with each word
    for place in spots['place']:
        if word.lower() == place.lower():
            spots.loc[spots['place'] == place, ' score'] += 1
            print(word)
    # print(word)

Karakoram
Fort
Karakoram
Hunza
Islamabad
Karakoram
Fort
Karakoram
Minar
Minar
Islamabad
Margalla
Margalla
Islamabad
Margalla
Murree
K2
Karakoram
Hunza
Karakoram
Deosai


In [9]:
spots.head(10)

Unnamed: 0,place,score,image_path
0,Hunza,2.9,images/hunza_valley.jpg
1,Badshaahi,0.8,images/badshaahi_mosque.jpg
2,Wagah,0.7,images/wagah_border.jpg
3,Faisal,0.6,images/faisal_mosque.jpg
4,Margalla,3.5,images/margalla_hills.jpg
5,Fort,2.4,images/lahore_fort.jpg
6,Deosai,1.2,images/deosai_national_park.jpg
7,Dolmen,0.1,images/dolmen_mall.jpg
8,Port,0.4,images/port_grand.jpg
9,Daman-e-Koh,0.5,images/daman-e-koh.jpg


In [12]:
spots = spots.sort_values(' score', ascending=False).head(10)

In [13]:
spots.head(10)

Unnamed: 0,place,score,image_path
10,Karakoram,6.6,images/karakoram_highway.jpg
15,Islamabad,3.7,images/islamabad_zoo.jpg
4,Margalla,3.5,images/margalla_hills.jpg
0,Hunza,2.9,images/hunza_valley.jpg
20,Minar,2.7,images/lahore_fort.jpg
5,Fort,2.4,images/lahore_fort.jpg
14,Murree,1.8,images/murree_hills.jpg
18,K2,1.4,images/k2_base_camp.jpg
6,Deosai,1.2,images/deosai_national_park.jpg
13,Shangrila,0.9,images/shangrila_resort.jpg


In [15]:
#upload the data to the csv file
spots.to_csv('top_5.csv', index=False)