# Disclaimer

This script relies on scraping data due to the deprecation of the mountain project API in 2020.

Note that scraping data from websites may be against their terms of service, so it's important to check the site's policies before attempting to scrape data. Additionally, scraping large amounts of data or repeatedly scraping a site can put a strain on their servers and may be considered unethical. Always be respectful of the site's resources and policies when scraping data.



In [91]:
import pandas as pd
import plotly.express as px
import datetime as dt

pd.options.mode.chained_assignment = None  # Disable the warning
# I used venv with this command to get jp working
# ipython kernel install --user --name=your_env_name

"""
(venv) (main) ~/Documents/climb ++ ipython kernel install --user --name=venv
Installed kernelspec venv in /Users/me/Library/Jupyter/kernels/venv
(venv) (main) ~/Documents/climb ++
(venv) (main) ~/Documents/climb ++
(venv) (main) ~/Documents/climb ++
(venv) (main) ~/Documents/climb ++ jupyter-notebook
"""
pass

In [160]:
def insert_newlines(string, splitlength=50):
    if type(string) != str:
        return ""
    return '\n'.join([string[i:i+splitlength] for i in range(0, len(string), splitlength)])


In [196]:
url = 'https://www.mountainproject.com/user/200169225/seth-drew/tick-export'
response = requests.get(url)
df = pd.read_csv(url)


In [197]:
df_codes = pd.read_csv("grade_codes.csv", names=["Rating Code", "Rating Name"]).set_index("Rating Code")
std_rating_names_mapping = df_codes['Rating Name'].to_dict()

In [198]:
df.Rating = df.apply(lambda x: std_rating_names_mapping[x['Rating Code']], axis=1)

In [199]:
sport_code_range = '950 <= `Rating Code` <= 15000'
bouldering_code_range = '20000 <= `Rating Code` <= 22000'

In [200]:
redpoint_df = df.loc[(df['Lead Style'] != 'Fell/Hung') & (df['Style'] != 'Fell/Hung')]
redpoint_df = redpoint_df.drop_duplicates(subset=['Route'])
hungdf = df.loc[(df['Lead Style'] == 'Fell/Hung') | (df['Style'] == 'Fell/Hung')]

# Select only the rows where the "Style" column is "Sport" and the "Rating Code" column falls within the sport grade range
sportdf = redpoint_df.query(sport_code_range)
boulderingdf = redpoint_df.query(bouldering_code_range)


In [203]:
def plot(df):
    df['Date'] = pd.to_datetime(df['Date'])
    today = pd.Timestamp.today()
    df['Days Ago'] = (today - df['Date']).dt.days
    df['Months Ago'] = ((today - df['Date']) / pd.Timedelta(days=30))
    df['Note'] = df.Notes.apply(insert_newlines)
    df = df.sort_values(['Rating Code', 'Days Ago'])

    fig = px.bar(df, x='Rating', color='Months Ago',
#                  opacity=1-(df['Days Ago']/(df['Days Ago'].max()))**(1/2),
                 barmode='stack', title='Route by Rating, Stacked by Age', 
#                              color_continuous_scale=[(0, 'gray'), (1, 'gray')],
                 hover_data=['Route', "Days Ago", "Note"])

    fig.update_layout(xaxis={'categoryorder': 'array', 'categoryarray': df['Rating'].tolist()})

    fig.show()

In [204]:
plot(sportdf)

In [205]:
plot(hungdf)

In [206]:
plot(boulderingdf)

# Reference

In [27]:
## Scraping javascript dynamic search data

In [28]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Set up the browser
browser = webdriver.Chrome()
browser.get("https://www.mountainproject.com/search?q=seth-drew&type=users")

# Wait for the data to load
wait = WebDriverWait(browser, 10)
wait.until(EC.presence_of_element_located((By.XPATH, "//img[@class='user-img-avatar']")))

# Get the data
data = browser.page_source

# Clean up
browser.quit()

# Process the data as needed

KeyboardInterrupt: 

In [None]:
url = 'https://www.mountainproject.com/user/200169225'

# Make a GET request to the URL and retrieve the HTML content
response = requests.get(url)
html_content = response.content