In [10]:
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import nltk
%matplotlib inline

import grequests # You will get errors if grequests is not above requests
import requests
from requests.adapters import HTTPAdapter, Retry
from pandarallel import pandarallel
from bs4 import BeautifulSoup
import lxml
import cchardet
import re

import pyinputplus as pyip
import datetime as dt
from datetime import datetime
from tqdm import tqdm
import pickle
import random
import math 
import string

from unique_route_handling import *

%load_ext autoreload
%autoreload 2

tqdm.pandas()
pandarallel.initialize(progress_bar=True)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 150)

import warnings
warnings.simplefilter("ignore", category=UserWarning) # Grequests is a monkeypatch and not intended to be used with jupyter. This silences an annoying userwarning.

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
INFO: Pandarallel will run on 6 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

https://nalepae.github.io/pandarallel/troubleshooting/


--- IMPORT ---

In [2]:
upload_link = 'https://www.mountainproject.com/user/200180658/brayden-l'
upload_type = 'tick'
df_usend, _ = download_routelist(upload_type, upload_link)

--- DATA CLEANSE AND STANDARDIZE ---

In [3]:
df_usend = data_standardize(df_usend)

--- CREATE UNIQUE LIST ---

In [4]:
df_usend_uniq = df_usend.drop_duplicates(subset="Route ID")
df_usend_uniq = user_uniq_clean(df_usend_uniq)

--- GRADE HOMOGENIZATION AND ROUTE LENGTH CLEANUP ---

In [5]:
df_usend_uniq = route_length_fixer(df_usend_uniq, 'express')

In [6]:
grade_settings = ['letter', 'even_rand', 'flat', 'even_rand']
df_usend_uniq = grade_homo(df_usend_uniq, *grade_settings)

--- SCRAPE ---

In [12]:
df_usend_uniq = routescrape_syncro(df_usend_uniq)

INFO: Pandarallel will run on 6 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

https://nalepae.github.io/pandarallel/troubleshooting/


NameError: name 'stqdm' is not defined

--- ANALYZE ---

In [None]:
df_usend_uniq = extract_default_pitch(df_usend_uniq, par=True)

In [None]:
df_usend_uniq = extract_tick_details(df_usend_uniq, par=True)

In [None]:
df_usend_uniq = tick_analysis(df_usend_uniq)

In [None]:
# Let's save the now scraped dataframe to a pickle file
df_usend_uniq.to_pickle('../Data_Archive/df_usend_archive')

In [None]:
# To load the new pickle file
picklefile = open('../Data_Archive/df_usend_archive', 'rb')
df_usend_uniq = pickle.load(picklefile)

--- MERGE UNIQUE DATA TO TICK LIST ---

In [None]:
# merge unique dataframe details to user data frame. This will delete the user data frame of length and rating information and replace it with that from the unique dataframe
df_usendm = df_usend.copy() # We create a copy of the original df_usend to be consequently modified by the user. This leaves the original dataframe alone.
df_usendm.drop(columns=['Rating', 'Length'], inplace=True)
df_usendm = df_usendm.merge(df_usend_uniq[['Route ID', 'Pitches', 'Lead Ratio', 'Num Ticks', 'Num Tickers', 'OS Ratio', 'Mean Attempts To RP', 'Rating', 'Length']], how='left', on='Route ID')

--- TAG NOTABLE SENDS ---

In [None]:
#Initialize columns

df_usendm.insert(len(df_usendm.columns),'Flash/Onsight',None)
df_usendm.insert(len(df_usendm.columns),'Worked Clean',None)
df_usendm.insert(len(df_usendm.columns),'Grade Breakthrough',None)
df_usendm.insert(len(df_usendm.columns),'Attempts',float('NaN'))

In [None]:
# We want to tag important climbs, namely flash/onsights, worked clean routes and grade breakthroughs.

# Tag climbs that were flash/onsight
df_usendm.loc[df_usendm['Lead Style'].isin(CLEAN_SEND_FIRST), 'Flash/Onsight'] = True

# Create column that flags climbs that were worked. There are three possibilities to consider. We want 1 and 2.
# 1. Worked to clean send, no further sends.
# 2. Worked to clean send, additional attempts.
# 3. Sent clean first try, additional attempts.
df_all_dupes = df_usendm[df_usendm.duplicated(subset="Route ID", keep=False)] # First we filter for all duplicate entries.
df_all_worked = df_all_dupes.groupby('Route ID').filter(lambda x: ~x['Lead Style'].isin(CLEAN_SEND_FIRST).any()) # Then we remove all groups which have a lead style of flash or onsight to eliminate group 3.
df_worked_clean_rponly = df_all_worked[df_all_worked.groupby('Route ID')['Lead Style'].apply(lambda x: x.isin(CLEAN_SEND_WORKED))] # fell/hungs and TRs remain, so we take ticks from CLEAN_SEND_WORKED.
df_worked_clean_earliest = df_worked_clean_rponly.loc[df_worked_clean_rponly.groupby('Route ID')['Date'].idxmin()] # Use only the earliest redpoint to correctly identify the first redpoint.
df_usendm.loc[df_worked_clean_earliest.index.values, "Worked Clean"] = True

# Flag grade breakthrough ticks
dfbreakthr = df_usendm[(df_usendm['Flash/Onsight'] == True) | (df_usendm['Worked Clean'] == True)]
breakthrough_indexes = dfbreakthr.groupby('Rating', observed=True)['Date'].idxmin().values
df_usendm.loc[breakthrough_indexes, "Grade Breakthrough"] = True
df_usendm.loc[breakthrough_indexes]

# Count number of attempts to send
# Assumes no style lead ticks are fell/hung
# Assumes rp/pp with no prior tick history has one prior attempt
# Counts clean ticks with multiple pitches as total attempts. It also counts fell/hung, and TR with multiple pitches as multiple attempts.
# !!! This will falsely identify a single pitch climb broken into multiple pitches as two attempts, there isn't really a good way to detect this.
df_worked_clean = df_all_worked.groupby('Route ID').filter(lambda x: x['Lead Style'].isin(CLEAN_SEND_WORKED).any()) # Filters out worked climbs that were never done clean.
num_to_send = df_worked_clean.groupby('Route ID').apply(lambda x: count_attempt2rp(x, x.iloc[0]['Pitches']))
num_to_send.rename('Attempts', inplace=True)
matched_attempts = df_usendm[df_usendm['Worked Clean'] == True].merge(num_to_send, on="Route ID", how="left")
matched_attempts.index = df_usendm[df_usendm['Worked Clean'] == True].index # I'm dumb and this is the best way I could find to get my index to remain
df_usendm.loc[matched_attempts.index, "Attempts"] = matched_attempts.iloc[:,-1]
df_usendm.loc[df_usendm['Attempts'] == 1, 'Attempts'] = 2 # This assumes rp with 1 pitch and no prior ticks had one prior attempt

--- ANALYZE FOR NOTABLE ELEMENTS ---

In [None]:
# User led something rarely led
df_bold_leads = df_usendm[(df_usendm['Num Ticks'] >= 30) & (df_usendm['Lead Ratio'] < 0.4) & (df_usendm['Style'] == 'Lead') & (df_usendm['Route Type'] != 'Boulder')].sort_values(by='Lead Ratio', ascending=False)

# User onsighted something rarely onsighted
df_impressive_OS = df_usendm[(df_usendm['Num Ticks'] >= 30) & (df_usendm['OS Ratio'] < 0.35) & (df_usendm['Flash/Onsight'] == True) & (df_usendm['Route Type'] != 'Boulder')].sort_values(by='OS Ratio')

# User fell on something rarely fallen on
df_woops_falls = df_usendm[(df_usendm['Num Ticks'] >= 30) & (df_usendm['OS Ratio'] > 0.8) & (df_usendm['Style'] == 'Lead') & (df_usendm['Lead Style'] == 'Fell/Hung') & (df_usendm['Route Type'] != 'Boulder')].sort_values(by='OS Ratio')


--- FILTER FOR VIS ---

In [None]:
# First we create a copy of the user modified dataframe, which we will refer to as the filtered dataframe
df_usendf = df_usendm.copy()

# Filters, these would be sliders and options in an interactive plot
roped_grade_min = '5.10a'
boulder_grade_min = 'V0'

# We slice the dataframe into a route and a boulder type. Each type will receive it's own independent filtering.

df_usendf_r = df_usendf[df_usendf['Rating'].isin(YDS_GRADES_FULL[YDS_GRADES_FULL.index(roped_grade_min):])] # the : here is key, it makes it so that it includes all grades above the min included.
df_usendf_b = df_usendf[df_usendf['Rating'].isin(V_GRADES_FULL[V_GRADES_FULL.index(boulder_grade_min):])]

In [None]:
# Set visualization settings accordant to modifications and filters.

if grade_settings[0] == 'letter':
    ryaxorder = YDS_GRADES_LETTER
if grade_settings[0] == 'sign':
    ryaxorder = YDS_GRADES_SIGN

if grade_settings[2] == 'flat':
    byaxorder = V_GRADES_FLAT
if grade_settings[2] == 'sign':
    byaxorder = V_GRADES_SIGN

--- SPLIT INTO ROPED AND BOULDER SUBSETS ---

In [None]:
# Create dataframe of clean sends for analysis

df_usendf_r.loc[df_usendf_r['Lead Style'].isin(CLEAN_SEND_FIRST), "Attempts"] = '' # Optionally change attempts from blank to 1 or other
df_clean_sends_r = df_usendf_r[(df_usendf_r['Lead Style'].isin(CLEAN_SEND))]
# df_clean_sends_r = df_clean_sends_r.loc[df_clean_sends_r.groupby('Route ID')['Date'].idxmin()] # Optionally ignore subequent clean sends
df_clean_sends_r['Date Formatted'] = df_clean_sends_r['Date'].dt.date

df_usendf_b.loc[df_usendf_b['Lead Style'].isin(CLEAN_SEND_FIRST), "Attempts"] = '' # Optionally change attempts from blank to 1 or other
df_clean_sends_b = df_usendf_b[(df_usendf_b['Style'].isin(CLEAN_SEND))]
# df_clean_sends_b = df_clean_sends_b.loc[df_clean_sends_r.groupby('Route ID')['Date'].idxmin()] # Optionally ignore subequent clean sends
df_clean_sends_b['Date Formatted'] = df_clean_sends_b['Date'].dt.date

In [None]:
fig = px.bar(df_clean_sends_r, y="Rating", orientation='h', category_orders={"Rating": ryaxorder[::-1]}, text='Attempts', custom_data=['Route', 'Date Formatted', 'Location', 'Length', 'Avg Stars']) # The [::-1] is an inverse slice
fig.update_layout(font={'family':'Courier New', 'color':'black', 'size':20}, title={'text':'<b>Climbing Pyramid</b>', 'x':0.5, 'font_size':30}, xaxis={'title': 'Number of Routes Sent'}, yaxis={'title': 'Grade'}, paper_bgcolor='#ece5dc', plot_bgcolor='#F5D3A5', bargap=0)
fig.update_traces(marker_color='#7A4F25', textposition = "inside", textfont={"color": 'White', "size": 12, "family": 'Arial Black'},  hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')
# fig.update_traces(marker_color=list(map(lambda x: '#7A4F25' if (x=='') else '#bf9315', df_clean_sends['Attempts'])), textposition = "inside",  hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')

In [None]:
fig = px.scatter(df_clean_sends_r, "Date", "Rating", category_orders={"Rating": ryaxorder[::-1]}, text='Attempts', custom_data=['Route', 'Date Formatted', 'Location', 'Length', 'Avg Stars'])
fig.update_layout(font={'family':'Courier New', 'color':'black', 'size':20}, title={'text':'<b>Send by Date</b>', 'x':0.5, 'font_size':30}, xaxis={'title': 'Date'}, yaxis={'title': 'Grade'}, paper_bgcolor='#ece5dc', plot_bgcolor='#F5D3A5', bargap=0)
fig.update_traces(marker_symbol='square', marker_color='#7A4F25', marker_size=20, marker_line_width=2, marker_line_color='black', textfont={"color": 'White', "size": 12}, hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')

In [None]:
fig = px.bar(df_clean_sends_b, y="Rating", orientation='h', category_orders={"Rating": byaxorder[::-1]}, text='Attempts', custom_data=['Route', 'Date Formatted', 'Location', 'Length', 'Avg Stars']) # The [::-1] is an inverse slice
fig.update_layout(font={'family':'Courier New', 'color':'black', 'size':18}, title={'text':'<b>Climbing Pyramid</b>', 'x':0.5, 'font_size':30}, xaxis={'title': 'Number of Problems Sent'}, yaxis={'title': 'Grade'}, paper_bgcolor='#ece5dc', plot_bgcolor='#F5D3A5', bargap=0)
fig.update_traces(marker_color='#7A4F25', textposition = "inside", textfont={"color": 'White', "size": 12, "family": 'Arial Black'},  hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')
# fig.update_traces(marker_color=list(map(lambda x: '#7A4F25' if (x=='') else '#bf9315', df_clean_sends['Attempts'])), textposition = "inside",  hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')

In [None]:
fig = px.scatter(df_clean_sends_b, "Date", "Rating", category_orders={"Rating": byaxorder[::-1]}, text='Attempts', custom_data=['Route', 'Date Formatted', 'Location', 'Length', 'Avg Stars'])
fig.update_layout(font={'family':'Courier New', 'color':'black', 'size':20}, title={'text':'<b>Send by Date</b>', 'x':0.5, 'font_size':30}, xaxis={'title': 'Date'}, yaxis={'title': 'Grade'}, paper_bgcolor='#ece5dc', plot_bgcolor='#F5D3A5', bargap=0)
fig.update_traces(marker_symbol='square', marker_color='#7A4F25', marker_size=20, marker_line_width=2, marker_line_color='black', textfont={"color": 'White', "size": 12}, hovertemplate='Name: %{customdata[0]}<br>Date: %{customdata[1]}<br>Location: %{customdata[2]}<br>Length: %{customdata[3]}ft<br>Avg Stars: %{customdata[4]}')