### Admin
- run `pip freeze > requirements.txt`
- build documentation
- submit to github

### Using Geonames API
- react side

### Ask Llama to check which acitvities to activate

In [83]:
destination_json = {'country': "United Kingdom",
               'city': "London",}

In [86]:
from decouple import config # obtains information inside .ini or .env
import replicate

# admin configuation 
replicate_api_token = config('replicate_api_token')

# prompt generation
pre_prompt = """You are a helpful, respectful and honest assistant. 
                Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. 
                Please ensure that your responses are socially unbiased and positive in nature.
                """

# This one only outputs individual places (without description)
prompt_input_0 = """Can you categorise {}, {} as one of the following:
                    - Kid friendly
                    - Pet friendly
                    - Wheelchair friendly
                    - Shopping
                    - Amusement parks
                    - Shopping
                    - Museums
                    - Parks and scenic plane
                    - Theatre & Cultural activities
                    - Historical sites
                    - Food galore

                    Once you categorised it, you only need to return the activities that exist in the city. 
                    For instance, if London does not have Amusement Parks, then exclude it from the list. 
                    If London is known for Shopping instead, then include it in a list of "Confirmed categories: Shopping...." as output.
                    """.format(destination_json['country'], destination_json['city'])



# getting the response
def generate_recommendation(replicate_api_token, pre_prompt, prompt_input):
    response = replicate.run(
        "meta/llama-2-13b-chat",
        input={
            "seed": 2,
            "top_k": 0,
            "top_p": 1,
            "prompt": prompt_input,
            "max_tokens": 100,  # decrease this to generate less texts
            "temperature": 0.25,
            "system_prompt": pre_prompt,
            "length_penalty": 0.5,
            "prompt_template": "<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n{prompt} [/INST]",
            "presence_penalty": 0.5,
            "log_performance_metrics": False
        },
    )

    # formatting 1: turning it into a whole paragraph
    full_response = ''
    for item in response:
        full_response += item

    # formatting 2: only capturing those from "places listed:..." format
    def format_0(full_response):
        for item in full_response.split("\n\n"):
            if "*" in item: # they would be given in a "* Place - Description" format
                item = item.split("\n*")
                break

        for i in range(len(item)):
            item[i] = item[i].replace("*", "").strip()

        return {"activities": item}
            
    return format_0(full_response)
            

activities_recommendations_json = generate_recommendation(replicate_api_token, pre_prompt, prompt_input_0)
    

In [88]:
activities_recommendations_json

{'activities': ['Shopping',
  'Museums',
  'Parks and scenic plane',
  'Theatre & Cultural activities',
  'Historical sites',
  'Food galore']}

### Using Bing API Search key to web-scrape information online

[Tutorial 1](https://www.scrapingdog.com/blog/scrape-bing/#Scraping_URLs_from_Bing): 
Selenium doesn't work as it gives "no results" when web-scraped directly so we'll use Bing Search API

[Tutorial 2](https://www.youtube.com/watch?v=QxHDX5_SERA):
The one that we'll follow

Activities list:
- Kid friendly
- Pet friendly
- Wheelchair friendly
- Shopping
- Amusement parks
- Shopping
- Museums
- Parks and scenic plane
- Theatre & Cultural activities
- Historical sites
- Food galore => include dietary restrictions

Of up to 10 categories 

In [60]:
# taken from react-server, activities preferences
acitivities_json = [{'country': "United Kingdom",
               'city': "London",
               'category':["Kid friendly"]}] # "Amusement Parks", "Museums"

In [61]:
from decouple import config # obtains information inside .ini or .env
import requests
from bs4 import BeautifulSoup


# admin configuation 
subscription_key = config('subscription_key_search', default='')


# declaring bing end-point for search
def get_results_for(subscription_key, search_term):
    search_url = "https://api.bing.microsoft.com/v7.0/search"

    headers = {"Ocp-Apim-Subscription-Key": subscription_key}
    params = {"q": search_term, "textDecorations": True, "textFormat": "HTML"}  ##answerCount filter, count?

    response = requests.get(search_url, headers=headers, params=params)
    response.raise_for_status()
    # we limit our results to first relevant 5 searches
    search_results = response.json()["webPages"]["value"][:5]
    return search_results


# creating a pipeline for quering potential places to show the swipping mechanism
def create_search_term_recommendations(subscription_key, acitivities_json):
    # sites we want to omit as the robustness of data is bad
    omit = ["Tripadvisor"]

    results = []

    # unpacking the activities json to send to API
    for cat in range(len(acitivities_json[0]["category"])):
        search_term = "Search for {} locations in {} {}".format(acitivities_json[0]["category"][cat], 
                                                                acitivities_json[0]["city"], 
                                                                acitivities_json[0]["country"])

        bing_results = get_results_for(subscription_key, search_term)

        # process each of the 5 results by:
        for res in bing_results:
            # ensuring that the siteName isn't in the omitted sites
            if res["siteName"] not in omit:
                # serialise it into site-name & url
                #results.append({'siteName': res["siteName"],
                #                'url': res["url"]})
                results.append(res["url"])
                # only get the first url
                break

    return results

locations_recommendations_url = create_search_term_recommendations(subscription_key, acitivities_json)

In [63]:
locations_recommendations_url

['https://www.discoverwalks.com/blog/london/londons-best-kept-secrets-30-kid-friendly-activities-and-places-to-visit/']

In [19]:
# original one
locations_recommendations

[{'siteName': 'Discover Walks',
  'url': 'https://www.discoverwalks.com/blog/london/londons-best-kept-secrets-30-kid-friendly-activities-and-places-to-visit/'},
 {'siteName': 'visitlondon.com',
  'url': 'https://www.visitlondon.com/things-to-do/family-activities/101-things-to-do-with-kids-in-london'},
 {'siteName': 'girl gone london',
  'url': 'https://girlgonelondon.com/things-to-do-in-london-with-kids/'},
 {'siteName': 'Four Around The World',
  'url': 'https://fouraroundtheworld.com/things-to-do-in-london/'}]

### Using Llama 2 to summarise and bundle up the search results by Bing API

[Tutorial 1](https://www.youtube.com/watch?v=dBoQLktIkOo)

In [23]:
locations_recommendations_url = ["https://www.discoverwalks.com/blog/london/londons-best-kept-secrets-30-kid-friendly-activities-and-places-to-visit/",
                                  "https://www.visitlondon.com/things-to-do/openspace/best-parks-in-london"]


In [37]:
from decouple import config # obtains information inside .ini or .env
import replicate

# admin configuation 
replicate_api_token = config('replicate_api_token')

# prompt generation
pre_prompt = """You are a helpful, respectful and honest assistant. 
                Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. 
                Please ensure that your responses are socially unbiased and positive in nature.
                """

# This one only outputs individual places (without description)
prompt_input_1 = """Please summarise the following websites: 
                {}
                Create a combined list of destinations found from the sites above. 
                It must be in a single line separated by commas. 
                Format your output in the form of "Places listed: [place 1, place 2, place 3, ....]"
                """.format(locations_recommendations_url)

# Outputs individual places with description
prompt_input_2 = """Please summarise the following websites: 
                {}
                Create a combined list of destinations found from the sites above.
                Give a brief description for each destination presented. 
                It must be in a single line separated by commas. 
                Format your output in the form of "Places listed: 
                * place 1 - description of place 1
                * place 2 - description of place 2
                * place 3 - description of place 3
                ...."
                """.format(locations_recommendations_url)


# getting the response
def generate_location_recommendation(replicate_api_token, pre_prompt, prompt_input):
    response = replicate.run(
        "meta/llama-2-13b-chat",
        input={
            "seed": 2,
            "top_k": 0,
            "top_p": 1,
            "prompt": prompt_input,
            "max_tokens": 612,  # increase this to generate more results  ##SOURCE OF ERROR??
            "temperature": 0.75,
            "system_prompt": pre_prompt,
            "length_penalty": 1,
            "prompt_template": "<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n{prompt} [/INST]",
            "presence_penalty": 0.5,
            "log_performance_metrics": False
        },
    )

    # formatting 1: turning it into a whole paragraph
    full_response = ''
    for item in response:
        full_response += item

    # formatting 2: only capturing those from "places listed:..." format
    def format_2(full_response):
        for item in full_response.split("\n"):
            if "Places listed:" in item:
                return item.strip("Places listed:").split(',')
    #return format_2(full_response)

    # format 3: same as format_2 but to be used with descriptions
    def format_3(full_response):
        for item in full_response.split("\n\n"):
            if "*" in item: # they would be given in a "* Place - Description" format
                #print(item.split("\n*"))
                return item.split("\n*")
            
    return format_3(full_response)
            

locations_recommendations_list = generate_location_recommendation(replicate_api_token, pre_prompt, prompt_input_2)
    

In [38]:
# for prompt 2
locations_recommendations_list

['* The London Eye - a iconic Ferris wheel offering stunning views of the city',
 ' The British Museum - a world-class museum featuring a vast collection of artifacts from around the world',
 ' The Tower of London - a historic castle and former prison, now home to the Crown Jewels',
 ' Hyde Park - a sprawling green space perfect for picnics, boating, and people-watching',
 ' Kew Gardens - a beautiful botanical garden featuring thousands of plant species and a treetop walkway',
 ' The Shard - a sleek skyscraper offering panoramic views of the city from its viewing platform',
 ' The National Gallery - a renowned art museum featuring works by artists such as Leonardo da Vinci, Michelangelo, and Vincent van Gogh',
 ' Covent Garden - a bustling square filled with street performers, boutiques, and restaurants',
 ' The Natural History Museum - a family-friendly museum featuring exhibits on dinosaurs, space, and the natural world',
 ' The Sky Garden - a free public garden on the 35th floor of 

In [53]:
# for prompt 1
locations_recommendations_list

['Hyde Park',
 " Regent's Park",
 ' Primrose Hill',
 ' Hampstead Heath',
 ' Green Park',
 " St. James's Park",
 ' Covent Garden',
 ' Kew Gardens',
 ' Richmond Park',
 ' Hampton Court Palace',
 ' Tower of London',
 ' London Zoo',
 ' Natural History Museum',
 ' British Museum',
 ' Science Museum',
 ' Diana Memorial Playground',
 ' Grant Museum of Zoology',
 ' London Bridge',
 ' Tower Bridge',
 ' Oxford Street',
 ' Bond Street',
 ' Camden Market',
 ' Brick Lane Market',
 ' Borough Market',
 ' and Southbank Centre.']

### Using Bing image query search API to get image photos and descriptions & formatting it to json

In [77]:
locations_recommendations_list = ['* The London Eye - a iconic Ferris wheel offering stunning views of the city',
 ' The British Museum - a world-class museum featuring a vast collection of artifacts from around the world',
 ' The Tower of London - a historic castle and former prison, now home to the Crown Jewels']

In [80]:
import requests

# admin configuation 
subscription_key = config('subscription_key_search', default='')


# declaring bing end-point for search
def get_results_for(subscription_key, search_term):
    search_url = "https://api.bing.microsoft.com/v7.0/images/search"

    headers = {"Ocp-Apim-Subscription-Key": subscription_key}
    params = {"q": search_term, "license": "Public", "imageType": "photo"}  ##ADD IMAGE SIZE?

    response = requests.get(search_url, headers=headers, params=params)
    response.raise_for_status()
    search_results = response.json()
    thumbnail_urls = [img["thumbnailUrl"] for img in search_results["value"][:1]] # only retrieve the first one
    return thumbnail_urls


def create_search_term_recommendations(subscription_key, locations_recommendations_list):
    results_json = []

    # unpacking the locations_recommendations_list to format it and send to API
    for index, loc in enumerate(locations_recommendations_list):
        
        loc = loc.split("-")
        location = loc[0].replace("*", "").strip()
        description = loc[1].replace(".", "").strip()
        description = description[0].upper() + description[1:]

        img_url = get_results_for(subscription_key, location)

        # package everything in json as: {}
        results_json.append({"index": index, 
                             "location": location, 
                             "description": description,
                             "image_url": img_url})
        

    return results_json

create_search_term_recommendations(subscription_key, locations_recommendations_list)

[{'index': 0,
  'location': 'The London Eye',
  'description': 'A iconic Ferris wheel offering stunning views of the city',
  'image_url': ['https://tse1.mm.bing.net/th?id=OIP.wUrEUGRR-TEkPow9CZOrtAHaE8&pid=Api']},
 {'index': 1,
  'location': 'The British Museum',
  'description': 'A world',
  'image_url': ['https://tse1.mm.bing.net/th?id=OIP.UPuM4rlAPQHNu2bKzyTLxwHaFj&pid=Api']},
 {'index': 2,
  'location': 'The Tower of London',
  'description': 'A historic castle and former prison, now home to the Crown Jewels',
  'image_url': ['https://tse3.mm.bing.net/th?id=OIP.U6aKBRt0ho2FBKXjuOo6-QHaE8&pid=Api']}]