In [201]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline

from bs4 import BeautifulSoup
import requests
import re
import pyinputplus as pyip
import requests
import pickle
import random
import math

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# Allow user to select data source

def input_choice(choices):
    return pyip.inputMenu(prompt='Select data source:\n', choices=choices)

choices = ["csv", "link"]
resp = input_choice(choices)
if resp == choices[0]:
    while True:
        try:
            resp = pyip.inputFilepath(prompt='input csv file path\n', mustExist=True)
            df = pd.read_csv(resp)
        except Exception as err:
            print(err)
            continue
        break
if resp == choices[1]:
    while True:
        try:
            resp = input('input link to profile of format "https://www.mountainproject.com/user/..."\n')
            df = pd.read_csv(f'{resp}/tick-export')
        except Exception as err:
            print(err)
            continue
        break

In [179]:
df = pd.read_csv('./ticks.csv')

YDS_GRADES = ['5.1', '5.2', '5.3', '5.4', '5.5', '5.6', '5.7', '5.8', '5.9', '5.10a', '5.10b', '5.10c', '5.10d', '5.11a', '5.11b', '5.11c', '5.11d', '5.12a', '5.12b', '5.12c', '5.12d', '5.13a', '5.13b', '5.13c', '5.13d', '5.14a', '5.14b', '5.14c', '5.14d', '5.15a', '5.15b', '5.15c', '5.15d', '5.16a']
V_GRADES = ['v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10', 'v11', 'v12', 'v13', 'v14', 'v15', 'v16', 'v17']
RISK_GRADES = ['pg', 'pg13', 'r', 'x']

In [180]:
# Transform all strings to lower case so we don't have to worry about it for operations. Can be turned to camel case later for presentation.
df = df.applymap(lambda s: s.lower() if type(s) == str else s)

# Remove all aid, ice, snow, TR only,and trad/boulder climbing route types as they are not relevant.
df = df[df['Route Type'].str.contains(r'aid|ice|snow') != True]
df = df[df['Route Type'].str.fullmatch(r'tr') != True] #if this is just a partial match it will detech "trad" too!
df = df[df['Route Type'].str.contains(r'trad') & df['Route Type'].str.contains(r'boulder') != True]

# "trad, sport" goes to "trad". If it uses gear it's trad!
df.loc[df['Route Type'].str.contains(r'trad') & df['Route Type'].str.contains(r'sport'), 'Route Type'] = 'trad'

# "x, alpine" and "x, tr" goes to "x" Alpine and tr tags are not useful.
def rem_route_el_from_list(ousted, seperator):
    el_rem_subset = df['Route Type'].str.contains(ousted) == True
    df.loc[el_rem_subset, 'Route Type'] = df[el_rem_subset]['Route Type'].apply(lambda row: [val for val in row.split(seperator) if val != ousted]).apply(lambda x: ", ".join(x))

rem_route_el_from_list('alpine', ', ')
rem_route_el_from_list('tr', ', ')

# Extract route unique identifier from URL and create a new column for it.
df.insert(len(df.columns),'Route ID','')
df['Route ID'] = df['URL'].apply(lambda x: x.split('/')[4])


In [181]:
# The first choice is what type of climbing to analyze.

choices = ["roped", "sport", "trad", 'boulder']
route_type_selection = input_choice(choices)
if route_type_selection == choices[0]:
    df = df[(df['Route Type'] == 'sport') | (df['Route Type'] == 'trad')]
if route_type_selection == choices[1]:
    df = df[df['Route Type'] == 'sport']
if route_type_selection == choices[2]:
    df = df[df['Route Type'] == 'trad']
if route_type_selection == choices[3]:
    df = df[df['Route Type'] == 'boulder']

# Then ask if the user would like to ignore climbs below a certain grade
min_grade_included = pyip.inputChoice(prompt='Select a minimum grade to include (blank includes all):\n', choices=YDS_GRADES, blank=True)
if min_grade_included != '':
    if route_type_selection == 'roped' or 'sport' or 'trad':
        df = df[df['Rating'].isin(YDS_GRADES[YDS_GRADES.index(min_grade_included):])] # the : here is key, it makes it so that it includes all grades above the min included.
    if route_type_selection == 'boulder':
        df = df[df['Rating'].isin(V_GRADES[V_GRADES.index(min_grade_included):])]

Select data source:
* roped
* sport
* trad
* boulder
Select a minimum grade to include (blank includes all):


In [None]:
#Handle route length outliers
if route_type_selection == 'boulder':
    MIN_LENGTH = 0
    MAX_LENGTH = 55 #"Too Tall to Fall" is 50'

if route_type_selection == 'roped' or 'sport' or 'trad':
    MIN_LENGTH = 25
    MAX_LENGTH = 4500 #Trango towers are 4,300' tall

length_outliers = df[(df['Length'] <= MIN_LENGTH) | (df['Length'] >= MAX_LENGTH)]
for loop_count, (index, data) in enumerate(length_outliers.iterrows()):
    updated_length = pyip.inputNum(f"[{loop_count+1}/{length_outliers.shape[0]}] Input Correct Length for Route: {data['Route']} (Currently {data['Length']}ft):\n", min=MIN_LENGTH, max=MAX_LENGTH)
    df.at[index, 'Length'] = updated_length

#Fill empty route lengths
length_missing = df[df['Length'].isnull()]
for loop_count, (index, data) in enumerate(length_missing.iterrows()):
    updated_length = pyip.inputNum(f"[{loop_count+1}/{length_missing.shape[0]}] Input Estimated Length for Route: {data['Route']}:\n", min=MIN_LENGTH, max=MAX_LENGTH)
    df.at[index, 'Length'] = updated_length

#TODO allow user to select "assign median height from area"

In [182]:
#Remove YDS-Vgrade combos
rating_split = df['Rating'].apply(lambda row: [val for val in row.split() if val not in V_GRADES])

#Seperate risk rating to new column
df.insert(3,'Risk','')
df['Risk'] = rating_split.apply(lambda row: [val for val in row if val in RISK_GRADES]).apply(lambda x: "".join(x))

#Reduce Rating column to just rating
df['Rating'] = rating_split.apply(lambda x: x[0])

In [183]:
if route_type_selection == "roped" or "sport" or "trad":
    #Ask user if they would prefer to round down, round up, round even by random, or hand-determine
    grademoderatemap = {'5.6-': '5.6', '5.6+': '5.6', '5.7-': '5.7', '5.7+': '5.7', '5.8-': '5.8', '5.8+': '5.8', '5.9-': '5.9', '5.9+': '5.9'}
    gradedownmap = {'5.10a/b': '5.10a', '5.10-': '5.10a', '5.10b/c': '5.10b', '5.10': '5.10b', '5.10c/d': '5.10c', '5.10+': '5.10c',
                '5.11a/b': '5.11a', '5.11-': '5.11a', '5.11b/c': '5.11b', '5.11': '5.11b', '5.11c/d': '5.11c', '5.11+': '5.11c',
                '5.12a/b': '5.12a', '5.12-': '5.12a', '5.12b/c': '5.12b', '5.12': '5.12b', '5.12c/d': '5.12c', '5.12+': '5.12c',
                '5.13a/b': '5.13a', '5.13-': '5.13a', '5.13b/c': '5.13b', '5.13': '5.13b', '5.13c/d': '5.13c', '5.13+': '5.13c',
                '5.14a/b': '5.14a', '5.14-': '5.14a', '5.14b/c': '5.14b', '5.14': '5.14b', '5.14c/d': '5.14c', '5.14+': '5.14c',
                '5.15a/b': '5.15a', '5.15-': '5.15a', '5.15b/c': '5.15b', '5.15': '5.15b', '5.15c/d': '5.15c', '5.15+': '5.15c',
    }
    gradeupmap = {'5.10a/b': '5.10b', '5.10-': '5.10b', '5.10b/c': '5.10c', '5.10': '5.10c', '5.10c/d': '5.10d', '5.10+': '5.10d',
                '5.11a/b': '5.11b', '5.11-': '5.11b', '5.11b/c': '5.11c', '5.11': '5.11c', '5.11c/d': '5.11d', '5.11+': '5.11d',
                '5.12a/b': '5.12b', '5.12-': '5.12b', '5.12b/c': '5.12c', '5.12': '5.12c', '5.12c/d': '5.12d', '5.12+': '5.12d',
                '5.13a/b': '5.13b', '5.13-': '5.13b', '5.13b/c': '5.13c', '5.13': '5.13c', '5.13c/d': '5.13d', '5.13+': '5.13d',
                '5.14a/b': '5.14b', '5.14-': '5.14b', '5.14b/c': '5.14c', '5.14': '5.14c', '5.14c/d': '5.14d', '5.14+': '5.14d',
                '5.15a/b': '5.15b', '5.15-': '5.15b', '5.15b/c': '5.15c', '5.15': '5.15c', '5.15c/d': '5.15d', '5.15+': '5.15d',
    }

    grade_homo_choice = ['round down', 'round up', 'round evenly by random', 'hand determine']
    resp = input_choice(grade_homo_choice)

    def grademoderate():
        grade_change_subset = df['Rating'].isin(list(grademoderatemap.keys()))
        df.loc[grade_change_subset, 'Rating'] = df[grade_change_subset]['Rating'].map(grademoderatemap)

    def grade_split(upmap, downmap):
        grade_change_subset = df['Rating'].isin(list(upmap.keys()))
        grade_change_subset_df = df[grade_change_subset]
        for grade in grade_change_subset_df['Rating'].unique():
            to_change = grade_change_subset_df[grade_change_subset_df['Rating'] == grade]
            changed_up = to_change.sample(frac=0.5)['Rating'].map(upmap)
            df.loc[changed_up.index, 'Rating'] = changed_up
        grade_change_subset = df['Rating'].isin(list(downmap.keys()))
        grade_change_subset_df = df[grade_change_subset]
        for grade in grade_change_subset_df['Rating'].unique():
            to_change = grade_change_subset_df[grade_change_subset_df['Rating'] == grade]
            changed_down = to_change['Rating'].map(downmap)
            df.loc[changed_down.index, 'Rating'] = changed_down

    if resp == grade_homo_choice[0]:
        grademoderate()
        grade_change_subset = df['Rating'].isin(list(gradedownmap.keys()))
        df.loc[grade_change_subset, 'Rating'] = df[grade_change_subset]['Rating'].map(gradedownmap)
    if resp == grade_homo_choice[1]:
        grademoderate()
        grade_change_subset = df['Rating'].isin(list(gradeupmap.keys()))
        df.loc[grade_change_subset, 'Rating'] = df[grade_change_subset]['Rating'].map(gradeupmap)
    if resp == grade_homo_choice[2]:
        grademoderate()
        grade_split(gradeupmap,gradedownmap)
    if resp == grade_homo_choice[3]:
        needs_grade_corr = df[df['Rating'].isin(list(grademoderatemap.keys()) + list(gradedownmap.keys()))]
        for loop_count, (index, data) in enumerate(needs_grade_corr.iterrows()):
            updated_grade = pyip.inputChoice(prompt=f"[{loop_count+1}/{needs_grade_corr.shape[0]}] Input Grade Correction For: {data['Route'].title()}:\n", choices=YDS_GRADES)
            df.at[index, 'Rating'] = updated_grade

if route_type_selection == "Bouldering":
    gradedownmap = {'v0-1': 'v0', 'v1-2': 'v1', 'v2-3': 'v2', 'v3-4': 'v3', 'v4-5': 'v4', 'v5-6': 'v5', 'v6-7': 'v6', 'v7-8': 'v7', 'v8-9': 'v8', 'v9-10': 'v9', 'v10-11': 'v10', 'v11-12': 'v11', 'v12-13': 'v12', 'v13-14': 'v13', 'v14-15': 'v14', 'v15-16':'v15', 'v16-17': 'v16'}
    gradeupmap = {'v0-1': 'v1', 'v1-2': 'v2', 'v2-3': 'v3', 'v3-4': 'v4', 'v4-5': 'v5', 'v5-6': 'v6', 'v6-7': 'v7', 'v7-8': 'v8', 'v8-9': 'v9', 'v9-10': 'v10', 'v10-11': 'v11', 'v11-12': 'v12', 'v12-13': 'v13', 'v13-14': 'v14', 'v14-15': 'v15', 'v15-16':'v16', 'v16-17': 'v17'}
    gradeconmap = {'v0-': 'v0', 'v0+': 'v0', 'v1-': 'v1', 'v1+': 'v1', 'v2-': 'v2', 'v2+': 'v2', 'v3-': 'v3', 'v3+': 'v3', 'v4-': 'v4', 'v4+': 'v4', 'v5-': 'v5', 'v5+': 'v5', 'v6-': 'v6', 'v6+': 'v6', 'v7-': 'v7', 'v7+': 'v7', 'v8-': 'v8', 'v8+': 'v8', 
                    'v9-': 'v9', 'v9+': 'v9', 'v10-': 'v10', 'v10+': 'v10', 'v11-': 'v11', 'v11+': 'v11', 'v12-': 'v12', 'v12+': 'v12', 'v13-': 'v13', 'v13+': 'v13', 'v14-': 'v14', 'v14+': 'v14', 'v15-': 'v15', 'v15+': 'v15', 'v16-': 'v16', 'v16+': 'v16', 'v17-': 'v17', 'v17+': 'v17'}
    
    # Remove all + and - grades
    grade_change_subset = df['Rating'].isin(list(gradeconmap.keys()))
    df.loc[grade_change_subset, 'Rating'] = df[grade_change_subset]['Rating'].map(gradeconmap)
    
    grade_homo_choice = ['round down', 'round up', 'round evenly by random', 'hand determine']
    resp = input_choice(grade_homo_choice)
    
    if resp == grade_homo_choice[0]:
        grade_change_subset = df['Rating'].isin(list(gradedownmap.keys()))
        df.loc[grade_change_subset, 'Rating'] = df[grade_change_subset]['Rating'].map(gradedownmap)
    if resp == grade_homo_choice[1]:
        grade_change_subset = df['Rating'].isin(list(gradeupmap.keys()))
        df.loc[grade_change_subset, 'Rating'] = df[grade_change_subset]['Rating'].map(gradeupmap)
    if resp == grade_homo_choice[2]:
        grade_split(gradeupmap,gradedownmap)
    if resp == grade_homo_choice[3]:
        needs_grade_corr = df[df['Rating'].isin(list(grademoderatemap.keys()) + list(gradedownmap.keys()))]
        for loop_count, (index, data) in enumerate(needs_grade_corr.iterrows()):
            updated_grade = pyip.inputChoice(prompt=f"[{loop_count+1}/{needs_grade_corr.shape[0]}] Input Grade Correction For: {data['Route'].title()}:\n", choices=V_GRADES)
            df.at[index, 'Rating'] = updated_grade

Select data source:
* round down
* round up
* round evenly by random
* hand determine


In [None]:
df.insert(df.columns.get_loc('Pitches')+1,'Listed Pitches','')
df.insert(len(df.columns),'re mainpage','')
df.insert(len(df.columns),'re statpage','')

def insert_str_to_address(url, insert_phrase):
    str_list = url.split('/')
    str_list.insert(4, insert_phrase)
    return '/'.join(str_list)

def page_download(url):
    try:
        res = requests.get(url, timeout=10)
    except Exception as e:
        print(e)
    print(res)
    return res

df['re mainpage'] = df['URL'].apply(lambda x: page_download(x))
df['re statpage'] = df['URL'].apply(lambda x: page_download(insert_str_to_address(x, 'stats')))

#TODO consider applying this function only to unique row IDs to speed it up, then simply copy and paste it among repeats.

In [210]:
# Let's analyze the mainpage for listed default pitch lengths. This is not included in the tick export but is helpful in determining user tick methodology

def get_pitches(res):
    soup = BeautifulSoup(res.text, 'html.parser')
    route_type_text = str(soup.find(class_="description-details").find_all('td')[1])
    pitch_search = re.search(r'\d+ pitches',route_type_text)
    if str(type(pitch_search)) == "<class 'NoneType'>":
        num_pitches = 1
    else:
        num_pitches = pitch_search.group(0).split(' ')[0]
    return int(num_pitches)

df['Listed Pitches'] = df['re mainpage'].apply(lambda x: get_pitches(x))


In [211]:
# Let's save the now scraped dataframe to a pickle file
df.to_pickle('df_archive')

In [266]:
# To load the new pickle file
picklefile = open('df_archive', 'rb')
df = pickle.load(picklefile)

In [281]:
# multiple pitch climbs ticked as single pitches -> ask for grade adjustment
subset = df[(df['Pitches'] == 1) & (df['Listed Pitches'] > 1)]
for loop_count, (index, data) in enumerate(subset.iterrows()):
            updated_grade = pyip.inputChoice(prompt=f"[{loop_count+1}/{subset.shape[0]}] Input Grade Correction For: {data['Route'].title()}:\n", choices=YDS_GRADES)
            df.at[index, 'Rating'] = updated_grade

# redpoint ticks with 1x pitch attempt and no prior fell/hung ticks need further data
subset = df[(df['Lead Style'].isin(['redpoint, pinkpoint'])) & ((df['Pitches'] == 1) | ~df.duplicated('Route'))]
for loop_count, (index, data) in enumerate(subset.iterrows()):
            updated_attempts = pyip.inputnum(prompt=f"[{loop_count+1}/{subset.shape[0]}] : Redpoint without prior attempt data, please input number of failed attempts on {data['Route'].title()}:\n", min=1)
            df.at[index, 'Pitches'] = updated_attempts + 1

In [278]:
# The "right" way to tick multiple attempts on a single pitch route is to tick a fell/hung for each attempt. Some people may instead tick a fell/hung with multiple pitches for multiple attempts. Someone who sends something first day but in multiple tries
# might also tick a redpoint with multiple pitches to include their attempts leading to that redpoint.

# single pitch climbs ticked as multiple pitches
# 	TR or follow -> break into multiple same day TR
# 	Lead
# 		fell/hung -> break into multiple same day fell/hung
# 		redpoint or pinkpoint -> N-1 fell hung and transform current into 1 pitch
# 		flash or onsight -> likely a mistake, change to 1 pitch

df[(df['Listed Pitches'] == 1) & (df['Pitches'] > df['Listed Pitches']) & (df['Style'].isin(['tr', 'follow']))]
df[(df['Listed Pitches'] == 1) & (df['Pitches'] > df['Listed Pitches']) & (df['Style'] == 'lead') & (df['Lead Style'] == 'fell/hung')]
df[(df['Listed Pitches'] == 1) & (df['Pitches'] > df['Listed Pitches']) & (df['Style'] == 'lead') & (df['Lead Style'].isin(['redpoint', 'pinkpoint']))]
df[(df['Listed Pitches'] == 1) & (df['Pitches'] > df['Listed Pitches']) & (df['Style'] == 'lead') & (df['Lead Style'].isin(['flash', 'onsight']))]

0       True
1      False
2       True
3       True
7       True
       ...  
712     True
728     True
730    False
731     True
732     True
Length: 644, dtype: bool

Unnamed: 0,Date,Route,Rating,Risk,Notes,URL,Pitches,Listed Pitches,Location,Avg Stars,Your Stars,Style,Lead Style,Route Type,Your Rating,Length,Rating Code,Route ID,re mainpage,re statpage
300,2021-01-15,birdland,5.7,,fuego climb. both 5.7+ cruxes were cool. loved...,https://www.mountainproject.com/route/10573311...,5,6,nevada > southern nevada > red rocks > 10-pine...,3.4,-1,lead,onsight,trad,,600.0,1900,105733115,<Response [200]>,<Response [200]>
301,2021-01-09,makunaima,5.11c,,"cleared all of the moves, but yet to have done...",https://www.mountainproject.com/route/10599032...,1,1,california > central coast > santa barbara > g...,3.9,-1,tr,,trad,,90.0,5200,105990326,<Response [200]>,<Response [200]>
303,2021-01-02,r. a. f.,5.9,,really legit. icky feeling move at the first b...,https://www.mountainproject.com/route/10572232...,1,1,california > joshua tree national park > centr...,2.0,-1,lead,onsight,trad,,70.0,2400,105722329,<Response [200]>,<Response [200]>
304,2021-01-02,w. a. c.,5.8,,much easier than shoe tapping. a good warmup f...,https://www.mountainproject.com/route/10572231...,1,1,california > joshua tree national park > centr...,1.9,-1,lead,onsight,trad,,90.0,2100,105722314,<Response [200]>,<Response [200]>
305,2021-01-02,pinky lee,5.11a,,second attempt clean. laybacky body language. ...,https://www.mountainproject.com/route/10580217...,1,1,california > joshua tree national park > centr...,2.1,-1,tr,,trad,,50.0,4600,105802175,<Response [200]>,<Response [200]>
306,2021-01-02,big moe,5.11a,r,a half dozen hangs but made it up. cool big mo...,https://www.mountainproject.com/route/10572257...,1,1,california > joshua tree national park > centr...,3.6,-1,tr,,trad,,50.0,4600,105722572,<Response [200]>,<Response [200]>
307,2021-01-02,the sound of one shoe tapping,5.8,,no crazy hard moves but heady lead as is par f...,https://www.mountainproject.com/route/10572231...,1,1,california > joshua tree national park > centr...,2.1,-1,lead,onsight,trad,,90.0,2100,105722311,<Response [200]>,<Response [200]>
308,2021-01-02,swing low,5.7,,prob my fav of 5.7 josh slab i’ve done so far.,https://www.mountainproject.com/route/10572490...,1,1,california > joshua tree national park > centr...,2.2,-1,lead,onsight,sport,,60.0,1800,105724909,<Response [200]>,<Response [200]>
309,2021-01-02,fun stuff,5.8,,bad sequence at the first bolt. not so hard if...,https://www.mountainproject.com/route/10572432...,1,1,california > joshua tree national park > centr...,2.4,-1,lead,fell/hung,trad,,60.0,2100,105724327,<Response [200]>,<Response [200]>
310,2021-01-01,left sawdust crack,5.10c,,"hung first time, clean second time. grabbed th...",https://www.mountainproject.com/route/10572263...,1,1,california > joshua tree national park > quail...,2.2,-1,tr,,trad,,40.0,3200,105722632,<Response [200]>,<Response [200]>
