In [1]:
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import nltk
%matplotlib inline

from bs4 import BeautifulSoup
import requests
import re
import pyinputplus as pyip
import requests
import datetime as dt
from datetime import datetime
from tqdm import tqdm
import pickle
import random
import math 
import string

%load_ext autoreload
%autoreload 2

tqdm.pandas()
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 150)

In [118]:
# Import other files
%run climbconstants.py
%run unique_route_handling.ipynb

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


--- IMPORT ---

In [57]:
upload_link = 'https://www.mountainproject.com/user/200180658/brayden-l'
upload_type = 'todo'
df_ulist = upload_df(upload_type, upload_link)

--- DATA CLEANSE AND STANDARDIZE ---

In [58]:
df_ulist = data_standardize(df_ulist)

--- GRADE HOMOGENIZATION AND ROUTE LENGTH CLEANUP ---

In [None]:
df_ulist = route_length_fixer(df_ulist)

In [None]:
df_ulist, grade_homo_settings = grade_homo(df_ulist)

--- SCRAPE ---

In [None]:
df_ulist = route_scrape(df_ulist)

--- ANALYZE ---

In [None]:
df_ulist = extract_tick_details(df_ulist)

In [81]:
# Let's save the now scraped dataframe to a pickle file
df_ulist.to_pickle('../Data_Archive/df_todo_archive')

In [119]:
# To load the new pickle file
picklefile = open('../Data_Archive/df_todo_archive', 'rb')
df_ulist = pickle.load(picklefile)

In [None]:
df_ulist = tick_analysis(df_ulist)

--- SPLIT OUTPUT INTO BOULDER AND ROPED SUBFRAME ---

In [121]:
# Select Boulder or Route
df_ulist_r = df_ulist[df_ulist['Route Type'] != 'Boulder']
df_ulist_b = df_ulist[df_ulist['Route Type'] == 'Boulder']

In [None]:
# Filter

In [122]:
min_ticks = 30

# Rarely led
df_low_lead = df_ulist_r[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist_r['Lead Ratio'] < 0.4) & (df_ulist_r['Pitches'] == 1)].sort_values(by='Lead Ratio')

# Rarely toproped
df_high_lead = df_ulist_r[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist_r['Lead Ratio'] > 0.9) & (df_ulist_r['Pitches'] == 1)].sort_values(by='Lead Ratio', ascending=False)

# Hard to OS
df_low_OS_r = df_ulist_r[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist_r['OS Ratio'] < 0.35)].sort_values(by='OS Ratio')
df_low_OS_b = df_ulist_b[(df_ulist_b['Num Ticks'] >= min_ticks) & (df_ulist_b['OS Ratio'] < 0.35)].sort_values(by='OS Ratio')

# High OS
df_high_OS_r = df_ulist_r[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist_r['OS Ratio'] > 0.8)].sort_values(by='OS Ratio', ascending=False)
df_high_OS_b = df_ulist_b[(df_ulist_b['Num Ticks'] >= min_ticks) & (df_ulist_b['OS Ratio'] > 0.8)].sort_values(by='OS Ratio', ascending=False)

# Find route on list that is hardest and easiest to OS given a grade
OS_cutoff_num = 3


In [123]:
grouplist = df_ulist['Rating'].unique()
outlist = []
for group in grouplist:
    outlist.extend(list(df_ulist[(df_ulist_r['Num Ticks'] >= min_ticks) & (df_ulist['Rating'] == group)].nsmallest(10, 'OS Ratio').index))
df_ulist.loc[outlist].sort_values('OS Ratio')

Unnamed: 0,Route,Original Rating,Rating,URL,Pitches,Location,Avg Stars,Your Stars,Route Type,Your Rating,Length,Rating Code,Route ID,Risk,Re Mainpage,Re Statpage,Route Ticks,Num Ticks,Num Tickers,Lead Ratio,OS Ratio,Tick Counts
730,Galen's Crack,5.10c,5.10c,https://www.mountainproject.com/route/10767604...,1,California > Yosemite National Park > Tuolumne...,3.1,-1,Trad,,45.0,3200,107676048,,<Response [200]>,<Response [200]>,Username ...,127,102,0.091837,0.0,TR 89 Lead 9 Fell/Hung ...
53,Hot Fun Sunday (Fell Down),5.11,5.11b,https://www.mountainproject.com/route/10571880...,1,Utah > Southeast Utah > Indian Creek > Fin Wall,3.0,-1,Trad,,70.0,5000,105718804,,<Response [200]>,<Response [200]>,Username ...,43,36,0.5,0.0,Lead 7 TR 5 Fell/Hung 4 F...
872,Make or Break Flake,5.10b,5.10b,https://www.mountainproject.com/route/10572552...,1,California > Joshua Tree National Park > Quail...,2.3,-1,Trad,,60.0,2900,105725521,,<Response [200]>,<Response [200]>,Username ...,39,35,0.466667,0.0,Lead 7 TR 4 Follow 4 F...
897,Desert Gold,5.13a,5.13a,https://www.mountainproject.com/route/10593491...,2,Nevada > Southern Nevada > Red Rocks > 16-Blac...,4.0,-1,Trad,,150.0,8600,105934919,PG13,<Response [200]>,<Response [200]>,Username ...,45,42,0.911765,0.0,Lead 31 Fell/Hung 21 Redpoint ...
322,Crash and Burn,5.12d,5.12d,https://www.mountainproject.com/route/10662024...,1,California > Los Angeles Basin > Santa Monica ...,3.7,-1,Sport,,50.0,7500,106620240,,<Response [200]>,<Response [200]>,Username ...,96,63,0.977778,0.016129,Lead 88 Fell/Hung 44 Redpoint 1...
642,Brother From Another Planet,5.12-,5.12a,https://www.mountainproject.com/route/10605839...,1,Utah > Southeast Utah > Indian Creek > Fin Wall,3.8,-1,Trad,,90.0,6700,106058396,,<Response [200]>,<Response [200]>,Username ...,53,44,0.586957,0.038462,Lead 27 TR 19 Fell/Hung 1...
888,Separate Reality,5.12a,5.12a,https://www.mountainproject.com/route/10587459...,1,California > Yosemite National Park > Yosemite...,4.0,-1,Trad,,50.0,6600,105874590,,<Response [200]>,<Response [200]>,Username ...,200,162,0.876543,0.054264,Lead 142 Fell/Hung 86 Redpoint ...
192,Pow! Right in the Kisser,5.12a,5.12a,https://www.mountainproject.com/route/10702785...,1,California > Los Angeles Basin > Santa Monica ...,3.0,-1,Sport,,40.0,6600,107027859,,<Response [200]>,<Response [200]>,Username ...,331,199,0.875421,0.054622,Lead 260 Fell/Hung 138 Redpoint ...
166,Ground Zero,5.12a,5.12a,https://www.mountainproject.com/route/10607525...,1,California > Los Angeles Basin > Santa Monica ...,3.0,-1,Sport,,35.0,6600,106075250,,<Response [200]>,<Response [200]>,Username ...,225,146,0.82266,0.06135,Lead 167 Fell/Hung 82 Redpoint ...
582,Way Rambo,5.12-,5.12a,https://www.mountainproject.com/route/10571864...,1,Utah > Southeast Utah > Indian Creek > Way Rambo,3.8,-1,Trad,,100.0,6700,105718642,,<Response [200]>,<Response [200]>,Username ...,855,647,0.52459,0.061856,Lead 320 TR 261 Fell/Hung ...
