In [1]:
import re
import json
import requests
import dateparser
import pandas as pd
from bs4            import BeautifulSoup
from pprint         import pprint
from IPython.display import Image
from IPython.core.display import HTML
from IPython import display
from pymongo import MongoClient
from bson.objectid import ObjectId
from pymongo.server_api import ServerApi
from random import randint

In [2]:
import json

with open('config.json') as config_file:
    config = json.load(config_file)

username = config['username']
password = config['password']

In [2]:
def get_id(url):
    id = ''

    # Regex find ID
    match = re.search(r'\/(area|route)\/(\d+)', url)

    # Check valid match
    if match:
        # Extract the ID number from the first capturing group
        id = match.group(2)

    # Invalid match
    else:
        raise Exception('Unable to locate route/area ID in url - {url}')
    
    return id

def get_directory():
    # Make request to get the route guide page
    url     = 'https://www.mountainproject.com/route-guide'
    content = requests.get(url).content
    soup    = BeautifulSoup(content, features = 'html.parser')
    guide   = soup.find(id = 'route-guide')

    # Create a dictionary to store the directory
    directory = {}

    # Get the list of areas
    areas = [strong.find('a') for strong in guide.find_all('strong')]

    for area in areas:
        # Get the area name
        name = area.text

        # Get the area link
        link = area['href']

        # Save to directory
        directory[name] = link

    return directory

# Gets an area from MP given its ID
def get_area(id):
    url     = f'https://www.mountainproject.com/api/v2/areas/{id}'
    data = requests.get(url).json()
    return data

# Gets a list of photos of the first few pics of a route
def get_photos(route_id):
    url = f'https://www.mountainproject.com/route/{route_id}'
    content = requests.get(url).content
    soup = BeautifulSoup(content, features = 'html.parser')

    photos = soup.find_all('img')
    photos = filter(lambda x: 'data-src' in x.attrs, photos)
    photos = map(lambda x: { 'url': x['data-src'].replace('smallMed', 'medium'), 'alt': x['alt'] }, photos)

    return list(photos)

# Gets a route from MP given its ID
def get_route(id):
    url     = f'https://www.mountainproject.com/api/v2/routes/{id}'
    data    = requests.get(url).json()
    data['photos'] = get_photos(id)
    return data

def get_routes_in(area_id, route_list, only_ids = True):
    area = get_area(area_id)

    for i, child in enumerate(area['children']):
        child_id = str(child['id'])

        if child['type'] == 'Route':
            if only_ids:
                route_list.append(child_id)
            else:
                route = get_route(child_id)
                route_list.append(route)
        else:
            get_routes_in(child_id, route_list, only_ids)

# DANGER: this function might take a couple billion years to run
def get_all_routes_ids():
    directory = get_directory()
    del directory['International']

    route_list = []

    for state in directory:
        area_id = get_id(directory[state])
        get_routes_in(area_id, route_list)

    return route_list

In [None]:
# Get all routes in an area
area_id = '105867832'
routes = []
get_routes_in(area_id, routes, only_ids = False)

In [None]:
for route in routes:
  if len(route['photos']) > 0:
    print(f'{route['title']}, {route['difficulty']}')

    # Display the first photo
    # display.display(HTML(f'<img src="{route["photos"][0]["url"]}">'))
    # Image(url = route['photos'][0]['url'])

In [3]:
uri = f'mongodb+srv://{username}:{password}@cluster0.phznzut.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0'
client = MongoClient(uri, server_api = ServerApi('1'))

try:
    client.admin.command('ping')
    print('Pinged your deployment. You successfully connected to MongoDB!')
except Exception as e:
    print(e)

db = client['mp']
db_areas = db['areas']
db_routes = db['routes']

Pinged your deployment. You successfully connected to MongoDB!


In [14]:
def populate_areas_in(area_id):
    area = get_area(area_id)
    print(area['title'])
    area_exists = db_areas.find_one({ 'id': area_id })

    if area_exists is None:
        db_areas.insert_one(area)

    if 'children' in area:
        for child in area['children']:
            child_id = child['id']

            if child['type'] != 'Route':
                populate_areas_in(child_id)

def populate_areas():
    directory = get_directory()
    del directory['International']

    for state in directory:
        area_id = get_id(directory[state])
        populate_areas_in(area_id)

In [None]:
populate_areas()

In [17]:
def populate_routes_from_areas():
  leaf_areas = db_areas.aggregate([
    { '$match': { 'is_leaf': True } },
    {'$sample': {'size': db_areas.count_documents({})}}
  ])

  for area in leaf_areas:
    if 'children' in area:
      for child in area['children']:
        child_id = child['id']

        route_exists = db_routes.find_one({ 'id': child_id })

        if route_exists is None:
            route = get_route(str(child_id))
            print(f'Adding {route['title']}, {route['difficulty']}')
            db_routes.insert_one(route)


In [None]:
populate_routes_from_areas()