In [1]:
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import nltk
%matplotlib inline

from bs4 import BeautifulSoup
import lxml
import requests
import re
import pyinputplus as pyip
import requests
import datetime as dt
from datetime import datetime
from tqdm import tqdm
import pickle
import random
import math 
import string

%load_ext autoreload
%autoreload 2

tqdm.pandas()
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 150)

In [30]:
# Import other files
%run climbconstants.py
%run unique_route_handling.ipynb

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


--- IMPORT ---

In [4]:
upload_link = 'https://www.mountainproject.com/user/200180658/brayden-l'
upload_type = 'todo'
df_ulist = download_routelist(upload_type, upload_link)

--- DATA CLEANSE AND STANDARDIZE ---

In [5]:
df_ulist = data_standardize(df_ulist)

--- GRADE HOMOGENIZATION AND ROUTE LENGTH CLEANUP ---

In [7]:
df_ulist = route_length_fixer(df_ulist, 'express')

In [8]:
grade_settings = ['letter', 'even_rand', 'flat', 'even_rand']
df_ulist = grade_homo(df_ulist, *grade_settings)

--- SCRAPE ---

In [None]:
df_ulist = route_scrape(df_ulist)

--- ANALYZE ---

In [10]:
df_ulist = extract_tick_details(df_ulist)

100%|██████████| 918/918 [06:45<00:00,  2.26it/s]


In [11]:
# Let's save the now scraped dataframe to a pickle file
df_ulist.to_pickle('../Data_Archive/df_todo_archive')

In [3]:
# To load the new pickle file
picklefile = open('../Data_Archive/df_todo_archive', 'rb')
df_ulist = pickle.load(picklefile)

In [56]:
df_sample = df_ulist.loc[878, 'Route Ticks']

def unpack_style(routeticks, colref, pitchnum):
    """
    Returns a flat list of all non-null values in a given tick df column.
    pitchnum allows us to handle multipitch ticks differently than singlepitch.
    
    Parameters
    ----------
    routeticks : df
        df of ticks for a specific route
    colref : str
        column name to unpack from
    pitchnum : int
        number of pitches in route
    
    Return
    ------
    flat_list : list of strings
        Flat list of style strings
    """
    nest_list = []
    for row in routeticks.index:
        styleval = routeticks[colref][row]
        if pitchnum == 1:
            if styleval in CLEAN_SEND: # clean sends with multiple ticks are assumed to be fell/hung attempts up to that clean send.
                nest_list.append([routeticks[colref][row]])
                nest_list.append((routeticks['Pitches Ticked'][row]-1)*['Fell/Hung'])
            else:
                nest_list.append(routeticks['Pitches Ticked'][row]*[routeticks[colref][row]])
        if pitchnum > 1:
            nest_list.append([routeticks[colref][row]])
    flat_list = [num for sublist in nest_list for num in sublist]
    return flat_list


def count_attempt2rp(df_source, pitchnum):
    """Takes a dataframe of a single given users ticks, outputs number of attempts to first rp

    Parameters
    ----------
    df_source : df
        A given users ticks

    Returns
    -------
    int
        Number of attempts to first redpoint
    """
    firstrp_cutoff = df_source[df_source['Lead Style'].isin(CLEAN_SEND_WORKED)]['Entry Date'].idxmin() # Find index of first rp
    # If multipitch, we count ticks. If singlepitch we count number of total ticked pitches.
    if pitchnum > 1:
        nattempts = df_source.sort_values('Entry Date', ascending=False).loc[firstrp_cutoff::]['Pitches Ticked'].count()
    if pitchnum == 1:
        nattempts = df_source.sort_values('Entry Date', ascending=False).loc[firstrp_cutoff::]['Pitches Ticked'].sum() # Sum all pitches attempted prior to that
    return nattempts

def is_prior_sender(df_source):
    """Takes a dataframe of a single given users ticks, outputs str of user if user did not onsight/flash prior to RP

    Parameters
    ----------
    df_source : df
        A given users ticks

    Returns
    -------
    str
        Returns name of user if valid, returns None if user has prior onsight/flash
    """
    try:
        firstrp_index = df_source[df_source['Lead Style'].isin(CLEAN_SEND_WORKED)]['Entry Date'].idxmin()
    except:
        print(df_source)
    already_sent_bool = any(df_source.loc[firstrp_index::]['Lead Style'].isin(CLEAN_SEND_FIRST))
    valid_sender = not already_sent_bool
    if valid_sender:
        return df_source.iloc[0]['Username']
    else:
        return None

def analyze_tick_counts(routeticks, pitchnum):
        if (routeticks is None) or (len(routeticks.index) == 0):
            num_ticks = num_tickers = lead_ratio = os_ratio = repeat_senders = rpnattempt_mean = tick_counts = float('NaN')
        else:
            # Get number of ticks and tickers
            num_ticks = len(routeticks.index)
            num_tickers = routeticks['Username'].nunique()
            
            # Create tick metrics
            tick_cat = CategoricalDtype(categories=TICK_OPTIONS)
            tick_type_list = pd.Series(unpack_style(routeticks, 'Style', pitchnum) + unpack_style(routeticks, 'Lead Style', pitchnum), dtype=tick_cat)
            tick_counts = tick_type_list.value_counts()
            repeat_senders = routeticks[routeticks['Lead Style'].isin(CLEAN_SEND)].groupby('Username')['Lead Style'].count().mean() # It is assumed that each clean send gets its own tick.
            
            rp_unames = routeticks[routeticks['Lead Style'].isin(CLEAN_SEND_WORKED)]['Username'] # List of names of those who rp'd
            if rp_unames.empty: # if the list exists, but has no redpointers, we want to assign NaN
               rpnattempt_mean = float('NaN')
            else:
                df_rpers_only = routeticks[routeticks['Username'].isin(rp_unames)]
                rpers_list = df_rpers_only.groupby('Username').apply(is_prior_sender) # Get list of users who did not onsight/flash prior to RP
                df_rpers_only = df_rpers_only[df_rpers_only['Username'].isin(rpers_list)] # Remove RPers with a prior onsight
                attempt_list = df_rpers_only.groupby('Username').apply(lambda x: count_attempt2rp(x, pitchnum)).values.tolist() # Count num attempts of each each user.
                rpnattempt_mean = np.mean(attempt_list)
            
            lead_ratio = tick_counts['Lead']/(tick_counts['Follow'] + tick_counts['TR'] + tick_counts['Lead'])
            os_ratio = (tick_counts['Onsight'] + tick_counts['Flash']) / (tick_counts['Onsight'] + tick_counts['Flash'] + tick_counts['Fell/Hung'] + tick_counts['Redpoint'] + tick_counts['Pinkpoint'] + tick_counts['Attempt'] + tick_counts['Send'])
        return pd.Series([num_ticks, num_tickers, lead_ratio, os_ratio, repeat_senders, rpnattempt_mean, tick_counts])

analyze_tick_counts(df_sample, 1)

ValueError: attempt to get argmin of an empty sequence

In [53]:
df_ulist.loc[878, 'Route Ticks']

Unnamed: 0,Username,User Link,Entry Date,Pitches Ticked,Style,Lead Style,Comment
0,Alex C,https://www.mountainproject.com/user/200182011...,2022-12-31,1,Lead,Onsight,"w/Syd. First route of the trip. Tough, but goo..."
1,Amos Wittenberg,https://www.mountainproject.com/user/201220976...,2022-12-28,1,Lead,Fell/Hung,Fell again! This was a hard start to the day. ...
2,Amos Wittenberg,https://www.mountainproject.com/user/201220976...,2022-12-26,1,Follow,,
3,Amos Wittenberg,https://www.mountainproject.com/user/201220976...,2022-12-26,1,Lead,Fell/Hung,Led first and fell on a silly slippery foot. F...
4,Ron Kirby,https://www.mountainproject.com/user/105958777...,2022-12-09,1,Lead,Onsight,Super fun climb descent was 5th class
...,...,...,...,...,...,...,...
1768,nancyjo Joseph,https://www.mountainproject.com/user/105998468...,1969-12-31,1,,,
1769,DayMartin,https://www.mountainproject.com/user/107640602...,1969-12-31,1,Lead,Onsight,
1770,Zak Noles,https://www.mountainproject.com/user/200357245...,1969-12-31,1,Lead,Onsight,
1771,Tate L,https://www.mountainproject.com/user/108096953...,1969-12-31,1,Lead,Onsight,


In [50]:
def tick_analysis(df_source):
    """
    Analyzes tick sub-df.
    
    Parameters
    ----------
    df_source : df
        Source dataframe
    
    Return
    ------
    Num Ticks : int
        Number of ticks
    Num Tickers : int
        Number of users who ticked
    Lead Ratio : float
        Ratio of lead ticks to total ticks with non-null style type
    OS Ratio : float
        Ratio of onsight plus flash ticks to total ticks with non-null lead-style type
    Tick Counts : series
        series of count of each type of tick
    
    """
    ### Analyzes tick sub dataframe to create meaningful metrics.
    
    def unpack_style(routeticks, colref, pitchnum):
        """
        Returns a flat list of all non-null values in a given tick df column.
        pitchnum allows us to handle multipitch ticks differently than singlepitch.
        
        Parameters
        ----------
        routeticks : df
            df of ticks for a specific route
        colref : str
            column name to unpack from
        pitchnum : int
            number of pitches in route
        
        Return
        ------
        flat_list : list of strings
            Flat list of style strings
        """
        nest_list = []
        for row in routeticks.index:
            styleval = routeticks[colref][row]
            if pitchnum == 1:
                if styleval in CLEAN_SEND: # clean sends with multiple ticks are assumed to be fell/hung attempts up to that clean send.
                    nest_list.append([routeticks[colref][row]])
                    nest_list.append((routeticks['Pitches Ticked'][row]-1)*['Fell/Hung'])
                else:
                    nest_list.append(routeticks['Pitches Ticked'][row]*[routeticks[colref][row]])
            if pitchnum > 1:
                nest_list.append([routeticks[colref][row]])
        flat_list = [num for sublist in nest_list for num in sublist]
        return flat_list


    def count_attempt2rp(df_source, pitchnum):
        """Takes a dataframe of a single given users ticks, outputs number of attempts to first rp

        Parameters
        ----------
        df_source : df
            A given users ticks

        Returns
        -------
        int
            Number of attempts to first redpoint
        """
        firstrp_cutoff = df_source[df_source['Lead Style'].isin(CLEAN_SEND_WORKED)]['Entry Date'].idxmin() # Find index of first rp
        # If multipitch, we count ticks. If singlepitch we count number of total ticked pitches.
        if pitchnum > 1:
            nattempts = df_source.sort_values('Entry Date', ascending=False).loc[firstrp_cutoff::]['Pitches Ticked'].count()
        if pitchnum == 1:
            nattempts = df_source.sort_values('Entry Date', ascending=False).loc[firstrp_cutoff::]['Pitches Ticked'].sum() # Sum all pitches attempted prior to that
        return nattempts

    def is_prior_sender(df_source):
        """Takes a dataframe of a single given users ticks, outputs str of user if user did not onsight/flash prior to RP

        Parameters
        ----------
        df_source : df
            A given users ticks

        Returns
        -------
        str
            Returns name of user if valid, returns None if user has prior onsight/flash
        """
        firstrp_index = df_source[df_source['Lead Style'].isin(CLEAN_SEND_WORKED)]['Entry Date'].idxmin()
        already_sent_bool = any(df_source.loc[firstrp_index::]['Lead Style'].isin(CLEAN_SEND_FIRST))
        valid_sender = not already_sent_bool
        if valid_sender:
            return df_source.iloc[0]['Username']
        else:
            return None
        

    def analyze_tick_counts(routeticks, pitchnum, routename):
        try:
            if (routeticks is None) or (len(routeticks.index) == 0):
                num_ticks = num_tickers = lead_ratio = os_ratio = repeat_senders = rpnattempt_mean = tick_counts = float('NaN')
            else:
                # Get number of ticks and tickers
                num_ticks = len(routeticks.index)
                num_tickers = routeticks['Username'].nunique()
                
                # Create tick metrics
                tick_cat = CategoricalDtype(categories=TICK_OPTIONS)
                tick_type_list = pd.Series(unpack_style(routeticks, 'Style', pitchnum) + unpack_style(routeticks, 'Lead Style', pitchnum), dtype=tick_cat)
                tick_counts = tick_type_list.value_counts()
                repeat_senders = routeticks[routeticks['Lead Style'].isin(CLEAN_SEND)].groupby('Username')['Lead Style'].count().mean() # It is assumed that each clean send gets its own tick.
                
                rp_unames = routeticks[routeticks['Lead Style'].isin(CLEAN_SEND_WORKED)]['Username'] # List of names of those who rp'd
                if rp_unames.empty: # if the list exists, but has no redpointers, we want to assign NaN
                    rpnattempt_mean = float('NaN')
                else:
                    df_rpers_only = routeticks[routeticks['Username'].isin(rp_unames)]
                    rpers_list = df_rpers_only.groupby('Username').apply(is_prior_sender) # Get list of users who did not onsight/flash prior to RP
                    df_rpers_only = df_rpers_only[df_rpers_only['Username'].isin(rpers_list)] # Remove RPers with a prior onsight
                    attempt_list = df_rpers_only.groupby('Username').apply(lambda x: count_attempt2rp(x, pitchnum)).values.tolist() # Count num attempts of each each user.
                    rpnattempt_mean = np.mean(attempt_list)
                
                lead_ratio = tick_counts['Lead']/(tick_counts['Follow'] + tick_counts['TR'] + tick_counts['Lead'])
                os_ratio = (tick_counts['Onsight'] + tick_counts['Flash']) / (tick_counts['Onsight'] + tick_counts['Flash'] + tick_counts['Fell/Hung'] + tick_counts['Redpoint'] + tick_counts['Pinkpoint'] + tick_counts['Attempt'] + tick_counts['Send'])
            return pd.Series([num_ticks, num_tickers, lead_ratio, os_ratio, repeat_senders, rpnattempt_mean, tick_counts])
        except:
            print(routename)

    df_source[['Num Ticks', 'Num Tickers', 'Lead Ratio', 'OS Ratio', 'Repeat Sender Ratio', 'Mean Attempts To RP', 'Tick Counts']] = df_source.progress_apply(lambda x: analyze_tick_counts(x['Route Ticks'], x['Pitches'], x['Route']), axis=1)
    return df_source

In [51]:
df_ulist = tick_analysis(df_ulist)

  lead_ratio = tick_counts['Lead']/(tick_counts['Follow'] + tick_counts['TR'] + tick_counts['Lead'])
  os_ratio = (tick_counts['Onsight'] + tick_counts['Flash']) / (tick_counts['Onsight'] + tick_counts['Flash'] + tick_counts['Fell/Hung'] + tick_counts['Redpoint'] + tick_counts['Pinkpoint'] + tick_counts['Attempt'] + tick_counts['Send'])
  os_ratio = (tick_counts['Onsight'] + tick_counts['Flash']) / (tick_counts['Onsight'] + tick_counts['Flash'] + tick_counts['Fell/Hung'] + tick_counts['Redpoint'] + tick_counts['Pinkpoint'] + tick_counts['Attempt'] + tick_counts['Send'])
  lead_ratio = tick_counts['Lead']/(tick_counts['Follow'] + tick_counts['TR'] + tick_counts['Lead'])
  os_ratio = (tick_counts['Onsight'] + tick_counts['Flash']) / (tick_counts['Onsight'] + tick_counts['Flash'] + tick_counts['Fell/Hung'] + tick_counts['Redpoint'] + tick_counts['Pinkpoint'] + tick_counts['Attempt'] + tick_counts['Send'])
  lead_ratio = tick_counts['Lead']/(tick_counts['Follow'] + tick_counts['TR'] + tick

Touch and Go


  lead_ratio = tick_counts['Lead']/(tick_counts['Follow'] + tick_counts['TR'] + tick_counts['Lead'])
  lead_ratio = tick_counts['Lead']/(tick_counts['Follow'] + tick_counts['TR'] + tick_counts['Lead'])
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  attempt_list = df_rpers_only.groupby('Username').apply(lambda x: count_attempt2rp(x, pitchnum)).values.tolist() # Count num attempts of each each user.
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 918/918 [00:36<00:00, 25.26it/s]


--- SPLIT OUTPUT INTO BOULDER AND ROPED SUBFRAME ---

In [14]:
# Select Boulder or Route
df_ulist_r = df_ulist[df_ulist['Route Type'] != 'Boulder']
df_ulist_b = df_ulist[df_ulist['Route Type'] == 'Boulder']

In [None]:
# Filter

In [15]:
min_ticks = 30

# Rarely led
df_low_lead = df_ulist_r[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist_r['Lead Ratio'] < 0.4) & (df_ulist_r['Pitches'] == 1)].sort_values(by='Lead Ratio')

# Rarely toproped
df_high_lead = df_ulist_r[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist_r['Lead Ratio'] > 0.9) & (df_ulist_r['Pitches'] == 1)].sort_values(by='Lead Ratio', ascending=False)

# Hard to OS
df_low_OS_r = df_ulist_r[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist_r['OS Ratio'] < 0.35)].sort_values(by='OS Ratio')
df_low_OS_b = df_ulist_b[(df_ulist_b['Num Ticks'] >= min_ticks) & (df_ulist_b['OS Ratio'] < 0.35)].sort_values(by='OS Ratio')

# High OS
df_high_OS_r = df_ulist_r[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist_r['OS Ratio'] > 0.8)].sort_values(by='OS Ratio', ascending=False)
df_high_OS_b = df_ulist_b[(df_ulist_b['Num Ticks'] >= min_ticks) & (df_ulist_b['OS Ratio'] > 0.8)].sort_values(by='OS Ratio', ascending=False)

# Find route on list that is hardest and easiest to OS given a grade
OS_cutoff_num = 3


In [None]:
grouplist = df_ulist['Rating'].unique()
outlist = []
for group in grouplist:
    outlist.extend(list(df_ulist[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist['Rating'] == group)].nsmallest(10, 'OS Ratio').index))
df_ulist.loc[outlist].sort_values('OS Ratio')