In [1]:
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import nltk
%matplotlib inline

from bs4 import BeautifulSoup
import lxml
import requests
import re
import pyinputplus as pyip
import requests
import datetime as dt
from datetime import datetime
from tqdm import tqdm
import pickle
import random
import math 
import string

%load_ext autoreload
%autoreload 2

tqdm.pandas()
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 150)

In [117]:
# Import other files
%run climbconstants.py
%run unique_route_handling.ipynb

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


--- IMPORT ---

In [92]:
upload_link = 'https://www.mountainproject.com/user/200180658/brayden-l'
upload_type = 'tick'
df_usend = download_routelist(upload_type, upload_link)

--- DATA CLEANSE AND STANDARDIZE ---

In [93]:
df_usend = data_standardize(df_usend)

--- CREATE UNIQUE LIST ---

In [5]:
df_usend_uniq = df_usend.drop_duplicates(subset="Route ID")
df_usend_uniq = user_uniq_clean(df_usend_uniq)

--- GRADE HOMOGENIZATION AND ROUTE LENGTH CLEANUP ---

In [46]:
df_usend_uniq = route_length_fixer(df_usend_uniq, 'express')

In [47]:
grade_settings = ['letter', 'even_rand', 'flat', 'even_rand']
df_usend_uniq = grade_homo(df_usend_uniq, *grade_settings)

--- SCRAPE ---

In [None]:
df_usend_uniq = route_scrape(df_usend_uniq)

--- ANALYZE ---

In [9]:
df_usend_uniq = extract_default_pitch(df_usend_uniq)

100%|██████████| 641/641 [00:45<00:00, 14.17it/s]


In [10]:
df_usend_uniq = extract_tick_details(df_usend_uniq)

100%|██████████| 641/641 [14:46<00:00,  1.38s/it]


In [11]:
# Let's save the now scraped dataframe to a pickle file
df_usend_uniq.to_pickle('../Data_Archive/df_usend_archive')

In [3]:
# To load the new pickle file
picklefile = open('../Data_Archive/df_usend_archive', 'rb')
df_usend_uniq = pickle.load(picklefile)

In [None]:
df_usend_uniq = tick_analysis(df_usend_uniq)

--- MERGE UNIQUE DATA TO TICK LIST ---

In [94]:
# merge unique dataframe details to user data frame. This will delete the user data frame of length and rating information and replace it with that from the unique dataframe
df_usendm = df_usend.copy() # We create a copy of the original df_usend to be consequently modified by the user. This leaves the original dataframe alone.
df_usendm.drop(columns=['Rating', 'Length'], inplace=True)
df_usendm = df_usendm.merge(df_usend_uniq[['Route ID', 'Pitches', 'Lead Ratio', 'Num Ticks', 'Num Tickers', 'OS Ratio', 'Rating', 'Length']], how='left', on='Route ID')

--- TAG NOTABLE SENDS ---

In [95]:
#Initialize columns

df_usendm.insert(len(df_usendm.columns),'Flash/Onsight',None)
df_usendm.insert(len(df_usendm.columns),'Worked Clean',None)
df_usendm.insert(len(df_usendm.columns),'Grade Breakthrough',None)
df_usendm.insert(len(df_usendm.columns),'Attempts',float('NaN'))

In [118]:
# We want to tag important climbs, namely flash/onsights, worked clean routes and grade breakthroughs.

# Tag climbs that were flash/onsight
df_usendm.loc[df_usendm['Lead Style'].isin(CLEAN_SEND_FIRST), 'Flash/Onsight'] = True

# Create column that flags climbs that were worked. There are three possibilities to consider. We want 1 and 2.
# 1. Worked to clean send, no further sends.
# 2. Worked to clean send, additional attempts.
# 3. Sent clean first try, additional attempts.
df_all_dupes = df_usendm[df_usendm.duplicated(subset="Route ID", keep=False)] # First we filter for all duplicate entries.
df_all_worked = df_all_dupes.groupby('Route ID').filter(lambda x: ~x['Lead Style'].isin(CLEAN_SEND_FIRST).any()) # Then we remove all groups which have a lead style of flash or onsight to eliminate group 3.
df_worked_sub1 = df_all_worked[df_all_worked.groupby('Route ID')['Lead Style'].apply(lambda x: x.isin(CLEAN_SEND_WORKED))] # fell/hungs and TRs remain, so we take ticks from CLEAN_SEND_WORKED.
df_worked_sub2 = df_worked_sub1.loc[df_worked_sub1.groupby('Route ID')['Date'].idxmin()] # Use only the earliest redpoint to correctly identify the first redpoint.
df_usendm.loc[df_worked_sub2.index.values, "Worked Clean"] = True

# Flag grade breakthrough ticks
dfbreakthr = df_usendm[(df_usendm['Flash/Onsight'] == True) | (df_usendm['Worked Clean'] == True)]
breakthrough_indexes = dfbreakthr.groupby('Rating', observed=True)['Date'].idxmin().values
df_usendm.loc[breakthrough_indexes, "Grade Breakthrough"] = True
df_usendm.loc[breakthrough_indexes]

def count_user_attempt2rp(df_source):
    """Takes a dataframe of all user ticks of a given climb, outputs number of attempts to first rp

    Parameters
    ----------
    df_source : df
        User ticks of a given climb

    Returns
    -------
    int
        Number of attempts to first redpoint
    """
    pitchnum = df_source.iloc[0]['Pitches']
    firstrp_cutoff = df_source[df_source['Lead Style'].isin(CLEAN_SEND_WORKED)]['Date'].idxmin() # Find index of first rp
    # If multipitch, we count ticks. If singlepitch we count number of total ticked pitches.
    if pitchnum > 1:
        nattempts = df_source.sort_values('Date', ascending=False).loc[firstrp_cutoff::]['Pitches Ticked'].count()
    if pitchnum == 1:
        nattempts = df_source.sort_values('Date', ascending=False).loc[firstrp_cutoff::]['Pitches Ticked'].sum() # Sum all pitches attempted prior to that
    return nattempts

# Count number of attempts to send
# Assumes no style lead ticks are fell/hung
# Assumes rp/pp with no prior tick history has one prior attempt
# Counts clean ticks with multiple pitches as total attempts. It also counts fell/hung, and TR with multiple pitches as multiple attempts.
# !!! This will falsely identify a single pitch climb broken into multiple pitches as two attempts, there isn't really a good way to detect this.
df_all_worked.loc[df_worked_sub2.index.values, "Worked Clean"] = True # To make the next line work properly, we need to backfill the "worked clean" flag to worked clean.
df_all_worked.drop(df_all_worked[(df_all_worked['Lead Style'].isin(CLEAN_SEND_WORKED)) & (df_all_worked['Worked Clean'] == '')].index, inplace=True) # remove all redpoints/pinkpoints after first for purpose of counting attempts, this can be done by removing all rows that are redpoints but not marked 'worked clean'
df_usendm['Attempts'] = df_usendm.groupby('Route ID').progress_apply(lambda x: count_attempt2rp(x, x['Pitches']))
# num_to_send = df_all_worked.groupby('Route ID')['Pitches Ticked'].sum()
# matched_attempts = df_usendm[df_usendm['Worked Clean'] == True].merge(num_to_send, on="Route ID", how="left")
# matched_attempts.index = df_usendm[df_usendm['Worked Clean'] == True].index # I'm dumb and this is the best way I could find to get my index to remain
# df_usendm.loc[matched_attempts.index, "Attempts"] = matched_attempts.iloc[:,-1]
# df_usendm.loc[df_usendm['Attempts'] == 1, 'Attempts'] = 2 # This assumes rp with 1 pitch and no prior ticks had one prior attempt


Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)

  0%|          | 1/641 [00:00<00:01, 333.23it/s]


ValueError: attempt to get argmin of an empty sequence

In [112]:
df_usendm

Unnamed: 0,Date,Route,Original Rating,Notes,URL,Pitches Ticked,Location,Avg Stars,Your Stars,Style,Lead Style,Route Type,Your Rating,Rating Code,Route ID,Risk,Pitches,Lead Ratio,Num Ticks,Num Tickers,OS Ratio,Rating,Length,Flash/Onsight,Worked Clean,Grade Breakthrough,Attempts
0,2022-12-21,Positive Vibrations,5.11a,,https://www.mountainproject.com/route/10586067...,12,California > High Sierra > 02 - The Sawtooth R...,4.0,-1,Lead,Redpoint,Trad,,4600,105860676,,12,0.923529,602.0,487.0,0.615721,5.11a,1200.0,,True,,24.0
1,2022-12-10,Le Bernd,V4,Probably about a dozen various attempts. I was...,https://www.mountainproject.com/route/10867338...,1,California > Central Coast > Santa Barbara > *...,2.9,-1,Send,,Boulder,,20400,108673389,,1,,57.0,53.0,0.160000,V4,12.0,,,,
2,2022-12-10,West Nile (aka The Warmup),V3,Big throw. Took me a few tries to figure out t...,https://www.mountainproject.com/route/10867338...,1,California > Central Coast > Santa Barbara > *...,2.3,-1,Send,,Boulder,,20300,108673384,,1,,65.0,61.0,0.125000,V3,12.0,,,,
3,2022-11-27,The Nose,5.11a,One hang. Feels more possible than it ever has...,https://www.mountainproject.com/route/10579305...,1,California > Central Coast > Santa Barbara > G...,3.4,-1,Lead,Fell/Hung,Trad,,4600,105793054,,1,0.544304,98.0,71.0,0.129032,5.11a,100.0,,,,
4,2022-11-27,The Nose,5.11a,Second attempt of the day. Took a big clipper ...,https://www.mountainproject.com/route/10579305...,1,California > Central Coast > Santa Barbara > G...,3.4,-1,Lead,Fell/Hung,Trad,,4600,105793054,,1,0.544304,98.0,71.0,0.129032,5.11a,100.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
730,2018-02-10,The Rapture,5.8-,,https://www.mountainproject.com/route/10579317...,1,California > Central Coast > Santa Barbara > G...,2.4,-1,Lead,Redpoint,Sport,,2000,105793171,,1,0.557377,111.0,94.0,0.851852,5.8,60.0,,,,
731,2018-02-05,Single Shot,V2,,https://www.mountainproject.com/route/10661372...,1,California > Central Coast > Santa Barbara > *...,2.7,-1,Send,,Boulder,,20200,106613724,,1,,44.0,43.0,0.181818,V2,13.0,,,,
732,2018-01-28,Face the Seam,5.10a/b,,https://www.mountainproject.com/route/10592542...,1,California > Central Coast > Santa Barbara > F...,2.0,-1,TR,,Sport,,2800,105925427,,1,0.729167,68.0,63.0,0.600000,5.10a,35.0,,,,
733,2018-01-28,Short Shot,5.8,,https://www.mountainproject.com/route/10579742...,1,California > Central Coast > Santa Barbara > F...,1.5,-1,Lead,Redpoint,Sport,,2100,105797420,,1,0.523364,147.0,124.0,0.738095,5.8,30.0,,,,


--- ANALYZE FOR NOTABLE ELEMENTS ---

In [97]:
# User led something rarely led
df_bold_leads = df_usendm[(df_usendm['Num Ticks'] >= 30) & (df_usendm['Lead Ratio'] < 0.4) & (df_usendm['Style'] == 'Lead') & (df_usendm['Route Type'] != 'Boulder')].sort_values(by='Lead Ratio', ascending=False)

# User onsighted something rarely onsighted
df_impressive_OS = df_usendm[(df_usendm['Num Ticks'] >= 30) & (df_usendm['OS Ratio'] < 0.35) & (df_usendm['Flash/Onsight'] == True) & (df_usendm['Route Type'] != 'Boulder')].sort_values(by='OS Ratio')

# User fell on something rarely fallen on
df_woops_falls = df_usendm[(df_usendm['Num Ticks'] >= 30) & (df_usendm['OS Ratio'] > 0.8) & (df_usendm['Style'] == 'Lead') & (df_usendm['Lead Style'] == 'Fell/Hung') & (df_usendm['Route Type'] != 'Boulder')].sort_values(by='OS Ratio')


--- FILTER FOR VIS ---

In [98]:
# First we create a copy of the user modified dataframe, which we will refer to as the filtered dataframe
df_usendf = df_usendm.copy()

# Filters, these would be sliders and options in an interactive plot
roped_grade_min = '5.10a'
boulder_grade_min = 'V0'

# We slice the dataframe into a route and a boulder type. Each type will receive it's own independent filtering.

df_usendf_r = df_usendf[df_usendf['Rating'].isin(YDS_GRADES_FULL[YDS_GRADES_FULL.index(roped_grade_min):])] # the : here is key, it makes it so that it includes all grades above the min included.
df_usendf_b = df_usendf[df_usendf['Rating'].isin(V_GRADES_FULL[V_GRADES_FULL.index(boulder_grade_min):])]

In [99]:
# Set visualization settings accordant to modifications and filters.

if grade_settings[0] == 'letter':
    ryaxorder = YDS_GRADES_LETTER
if grade_settings[0] == 'sign':
    ryaxorder = YDS_GRADES_SIGN

if grade_settings[2] == 'flat':
    byaxorder = V_GRADES_FLAT
if grade_settings[2] == 'sign':
    byaxorder = V_GRADES_SIGN

--- SPLIT INTO ROPED AND BOULDER SUBSETS ---

In [100]:
# Create dataframe of clean sends for analysis

df_usendf_r.loc[df_usendf_r['Lead Style'].isin(CLEAN_SEND_FIRST), "Attempts"] = '' # Optionally change attempts from blank to 1 or other
df_clean_sends_r = df_usendf_r[(df_usendf_r['Lead Style'].isin(CLEAN_SEND))]
# df_clean_sends_r = df_clean_sends_r.loc[df_clean_sends_r.groupby('Route ID')['Date'].idxmin()] # Optionally ignore subequent clean sends
df_clean_sends_r['Date Formatted'] = df_clean_sends_r['Date'].dt.date

df_usendf_b.loc[df_usendf_b['Lead Style'].isin(CLEAN_SEND_FIRST), "Attempts"] = '' # Optionally change attempts from blank to 1 or other
df_clean_sends_b = df_usendf_b[(df_usendf_b['Style'].isin(CLEAN_SEND))]
# df_clean_sends_b = df_clean_sends_b.loc[df_clean_sends_r.groupby('Route ID')['Date'].idxmin()] # Optionally ignore subequent clean sends
df_clean_sends_b['Date Formatted'] = df_clean_sends_b['Date'].dt.date

In [101]:
fig = px.bar(df_clean_sends_r, y="Rating", orientation='h', category_orders={"Rating": ryaxorder[::-1]}, text='Attempts', custom_data=['Route', 'Date Formatted', 'Location', 'Length', 'Avg Stars']) # The [::-1] is an inverse slice
fig.update_layout(font={'family':'Courier New', 'color':'black', 'size':20}, title={'text':'<b>Climbing Pyramid</b>', 'x':0.5, 'font_size':30}, xaxis={'title': 'Number of Routes Sent'}, yaxis={'title': 'Grade'}, paper_bgcolor='#ece5dc', plot_bgcolor='#F5D3A5', bargap=0)
fig.update_traces(marker_color='#7A4F25', textposition = "inside", textfont={"color": 'White', "size": 12, "family": 'Arial Black'},  hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')
# fig.update_traces(marker_color=list(map(lambda x: '#7A4F25' if (x=='') else '#bf9315', df_clean_sends['Attempts'])), textposition = "inside",  hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')

In [102]:
fig = px.scatter(df_clean_sends_r, "Date", "Rating", category_orders={"Rating": ryaxorder[::-1]}, text='Attempts', custom_data=['Route', 'Date Formatted', 'Location', 'Length', 'Avg Stars'])
fig.update_layout(font={'family':'Courier New', 'color':'black', 'size':20}, title={'text':'<b>Send by Date</b>', 'x':0.5, 'font_size':30}, xaxis={'title': 'Date'}, yaxis={'title': 'Grade'}, paper_bgcolor='#ece5dc', plot_bgcolor='#F5D3A5', bargap=0)
fig.update_traces(marker_symbol='square', marker_color='#7A4F25', marker_size=20, marker_line_width=2, marker_line_color='black', textfont={"color": 'White', "size": 12}, hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')

In [103]:
fig = px.bar(df_clean_sends_b, y="Rating", orientation='h', category_orders={"Rating": byaxorder[::-1]}, text='Attempts', custom_data=['Route', 'Date Formatted', 'Location', 'Length', 'Avg Stars']) # The [::-1] is an inverse slice
fig.update_layout(font={'family':'Courier New', 'color':'black', 'size':18}, title={'text':'<b>Climbing Pyramid</b>', 'x':0.5, 'font_size':30}, xaxis={'title': 'Number of Problems Sent'}, yaxis={'title': 'Grade'}, paper_bgcolor='#ece5dc', plot_bgcolor='#F5D3A5', bargap=0)
fig.update_traces(marker_color='#7A4F25', textposition = "inside", textfont={"color": 'White', "size": 12, "family": 'Arial Black'},  hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')
# fig.update_traces(marker_color=list(map(lambda x: '#7A4F25' if (x=='') else '#bf9315', df_clean_sends['Attempts'])), textposition = "inside",  hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')

In [104]:
fig = px.scatter(df_clean_sends_b, "Date", "Rating", category_orders={"Rating": byaxorder[::-1]}, text='Attempts', custom_data=['Route', 'Date Formatted', 'Location', 'Length', 'Avg Stars'])
fig.update_layout(font={'family':'Courier New', 'color':'black', 'size':20}, title={'text':'<b>Send by Date</b>', 'x':0.5, 'font_size':30}, xaxis={'title': 'Date'}, yaxis={'title': 'Grade'}, paper_bgcolor='#ece5dc', plot_bgcolor='#F5D3A5', bargap=0)
fig.update_traces(marker_symbol='square', marker_color='#7A4F25', marker_size=20, marker_line_width=2, marker_line_color='black', textfont={"color": 'White', "size": 12}, hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')