In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from pandas_profiling import ProfileReport
import pickle
import os

In [2]:
# Upload and clean
df, _, _ = pickle.load(open('C:/Users/Brayden/Desktop/LocScrapes/Indian Creek (Full).pkl', 'rb'))
df.drop_duplicates(subset='URL', inplace=True)

In [None]:
# Prefab EDA
profile = ProfileReport(df)
profile.to_widgets()

In [10]:
# Number of Ticks Exploration
pd.to_numeric(df['Num Ticks'].fillna(0, inplace=True), downcast='integer')

col=df['Num Ticks']
yperc = np.percentile(col, range(0,100))

fig1 = px.histogram(col, nbins=500, marginal='box', width=800)
fig1.update_layout(showlegend=False, title_x=0.5)
fig1.update_xaxes(title="Number of Ticks", row=1,col=1)
fig1.write_html('NumTicks_Hist.html')
fig1

In [12]:
fig2 = px.line(x=yperc, y=range(0,100), title='<b>Number of Ticks Percentile Plot</b>', width=800)
fig2.update_layout(title_x=0.5)
fig2.update_xaxes(title="Number of Ticks", range=[-10,1000])
fig2.update_yaxes(title="Percentile (%)")
fig2.update_traces(line_width=4)
fig2.write_html('NumTicks_Perc.html')
fig2

In [5]:
fig2.write_html("file.html")

In [30]:
# Percent of climbs responsible for percent of ticks analysis
pd.to_numeric(df['Num Ticks'].fillna(0, inplace=True), downcast='integer')
df_sorted = df.sort_values('Num Ticks', ascending=False)['Num Ticks']
sum_ticks = df['Num Ticks'].sum()
num_climbs = len(df.index)
df_sorted_sums = []
for i in range(0,num_climbs):
    df_sorted_sums.append(df_sorted.nlargest(i).sum())
df_sorted_sums = (df_sorted_sums/sum_ticks)*100
df_perc_of_climbs = np.round_((np.array([*range(0,num_climbs)])/num_climbs)*100, 1)

fig = px.line(x=df_perc_of_climbs, y=df_sorted_sums, title="<b>Percent of Climbs Responsible for Percent of Ticks</b>", width=800, height=400)
fig.update_yaxes(range = [0,100], title="Percent of Ticks (%)")
fig.update_xaxes(range = [0,100], title="Percent of Climbs (%)")
fig.update_layout(title_x=0.5)
fig.write_html('NumTicks_PercResp.html')
fig

In [34]:
fig = px.line(x=[*range(0,num_climbs)], y=df_sorted_sums, title="<b>N Most Ticked Routes Vs. Percent of Ticks</b>", width=800, height=400)
fig.update_yaxes(range = [0,100], title="Percent of Ticks (%)")
fig.update_xaxes(range = [0,500], title="N Most Ticked Routes")
fig.update_layout(title_x=0.5)
fig.write_html('NumTicks_NMost.html')
fig

In [33]:
# Top climbed analysis
top_climbed = df.sort_values('Num Ticks', ascending=False)[0:29]
plt.rcParams['figure.figsize']=(30,5)
fig = px.bar(top_climbed, x='Route', y='Num Ticks', title="<b>Top 30 Routes Tick Counts</b>", text_auto=True, width=800)
fig.update_xaxes(title='Route Name')
fig.update_yaxes(title='Number of Ticks')
fig.update_layout(title_x=0.5)
fig.write_html('NumTicks_Top30.html')
fig

In [93]:
# Sum ticks by area
df_loc_traf = df.groupby('Location')['Num Ticks'].sum().sort_values(ascending=False).to_frame().reset_index()
df_loc_traf['Location'] = df_loc_traf['Location'].apply(lambda x: x.split('>')[0])
fig = px.bar(df_loc_traf.iloc[0:30], x='Location', y='Num Ticks', title="<b>Sum of Ticks Per Crag</b>", width=800)
fig.update_xaxes(title='Crag Name')
fig.update_yaxes(title='Sum Number of Ticks')
fig.update_layout(title_x=0.5)
fig.write_html('NumTicks_Walls.html')
fig

In [96]:
tot = df.groupby('Location')['Num Ticks'].sum() / df.groupby('Location')['Route'].count()
tot=tot[tot.index.str.contains("Bridger")==False]
tot=tot.sort_values(ascending=False).to_frame().reset_index()[0:30]
tot['Location'] = tot['Location'].apply(lambda x: x.split('>')[0])
tot.rename(columns={0: 'Norm'}, inplace=True)
tot
fig = px.bar(tot, x='Location', y='Norm', title="<b>Sum Ticks / Sum Routes Per Crag</b>", width=800)
fig.update_xaxes(title='Crag Name')
fig.update_yaxes(title='Sum Ticks / Sum Routes')
fig.update_layout(title_x=0.5)
fig.write_html('NumTicksNorm_Walls.html')
fig

In [95]:
df['Lead Ratio'].fillna(0, inplace=True)
dfs = df[(df['Num Ticks']>=30)]

col=dfs['Lead Ratio']

fig1 = px.histogram(col, marginal='box', width=800, height=300, title="<b>Lead Ratio</b> <br>(Number of Ticks > 30)<br>")
fig1.update_layout(showlegend=False, title_x=0.5)
fig1.update_xaxes(title="Lead Ratio", row=1,col=1, range=[0,1])
fig1.write_html('Tick_Cutoff_30.html')
fig1

In [78]:
dfs[dfs['Lead Ratio']<0.4].sort_values('Lead Ratio', ascending=True)[['Route', 'Location' ,'URL', 'Rating', 'Num Ticks', 'Lead Ratio']].style.set_properties(subset='URL', **{'width':'300px'})

Unnamed: 0,Route,Location,URL,Rating,Num Ticks,Lead Ratio
1028,Pringles,Supercrack Buttress > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/105717427/pringles,5.11+,143.0,0.064815
900,Naked and the Dead Variation,Donnelly Canyon > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/105943086/naked-and-the-dead-variation,5.11,221.0,0.068027
1203,Inflictor,Broken Tooth > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/106123505/inflictor,5.12-,112.0,0.173913
278,Town,4X4 > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/105718396/town,5.10d,225.0,0.267974
491,Dirty Woman,The Wall > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/107385681/dirty-woman,5.10,113.0,0.270588
922,New World Order,Battle of the Bulge Buttress > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/106155187/new-world-order,5.11b/c,112.0,0.289474
819,Mariposa,Habitado > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/116668339/mariposa,5.10-,212.0,0.338983
631,(Unknown),Selfish Wall > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/109590225/unknown,5.9,180.0,0.353741
94,Unnamed 5.10 Flake,Blue Gramma Cliff > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/106141573/unnamed-510-flake,5.10,118.0,0.356164
115,The Fat Farm,Scarface > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/106069249/the-fat-farm,5.10,188.0,0.368421


In [47]:
col=dfs['Repeat Sender Ratio']

fig1 = px.histogram(col, marginal='box', width=800, title="<b>Repeat Sender Ratio</b> <br>(Number of Ticks > 100)<br>")
fig1.update_layout(showlegend=False, title_x=0.5)
fig1.update_xaxes(title="Lead Ratio", row=1,col=1, range=_)
fig1.write_html("RepeatSends_Hist")
fig1

In [48]:
# Create repeat sender sum value not given by original analysis
df['Repeat Sender Sum'] = df['Route Ticks'].apply(lambda x: 
            x[x["Lead Style"].isin(["Onsight", "Flash", "Redpoint", "Pinkpoint", "Send"])]
            .groupby("Username")["Lead Style"]
            .count()
            .sum()
        )

In [49]:
col=df['Repeat Sender Sum']

fig1 = px.histogram(col, marginal='box', width=800, title="<b>Repeat Sender Sum</b> <br>(Number of Ticks > 100)<br>")
fig1.update_layout(showlegend=False, title_x=0.5)
fig1.update_xaxes(title="Lead Ratio", row=1,col=1, range=_)

In [109]:
df.sort_values('Repeat Sender Sum', ascending=False)[0:30][['Route', 'Repeat Sender Sum']]
# Most of these are repeats of top ticked climbs, with notable exceptions of
# Dr. Carl
# Jupiter Crack
# Lady Pillar
# Lightning Bolt Crack
# Unnamed 5.10 LF flake in deep corner

Unnamed: 0,Route,Repeat Sender Sum
354,Blue Sun,714
149,Incredible Hand Crack,655
148,Supercrack of the Desert,654
141,Twin Cracks,623
3,Chocolate Corner,587
4,Generic Crack,581
1,Binou's Crack,539
135,Scarface,417
114,Wavy Gravy,332
236,South Face,325


In [64]:
df['Normalized Repeat Sender Sum'] = df['Repeat Sender Sum'] / df['Num Ticks']
pd.set_option('max_colwidth', -1)
df[df['Num Ticks'] > 100].sort_values('Normalized Repeat Sender Sum', ascending=False)[0:30][['Route', 'Location', 'URL', 'Rating', 'Normalized Repeat Sender Sum']]


Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.



Unnamed: 0,Route,Location,URL,Rating,Normalized Repeat Sender Sum
793,Skinny Dip,The Beach > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/116802263/skinny-dip,5.9,0.517544
603,Nomadic Alternative,Suburbia > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/107848733/nomadic-alternative,5.10-,0.466165
56,[Redacted],Reservoir Wall > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/105718684/redacted,5.10-,0.451389
794,Crack Slabbath,The Beach > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/116094125/crack-slabbath,5.9+,0.447853
139,Triple Jeopardy,Supercrack Buttress > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/105717421/triple-jeopardy,5.8,0.446512
510,Sparkling Zygote,Sparks Wall > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/106293592/sparkling-zygote,5.10-,0.422425
52,Good Excuse,Reservoir Wall > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/106278440/good-excuse,5.9,0.41958
796,Kelley Route 8,The Beach > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/116094137/kelley-route-8,5.10-,0.398438
756,1 to 4 crack,Public Service Wall > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/107856802/1-to-4-crack,5.10,0.396825
404,Prepare for Disappointment,Cliffs of Insanity > Indian Creek > Southeast Utah > Utah,https://www.mountainproject.com/route/113834967/prepare-for-disappointment,5.9,0.392308


In [97]:
col=df[df['Num Ticks']>=100]['Normalized Repeat Sender Sum']

fig1 = px.histogram(col, marginal='box', width=800, title="<b>Sum Repeat Send Ticks / Sum Ticks</b> <br>(Number of Ticks > 100)<br>")
fig1.update_layout(showlegend=False, title_x=0.5)
fig1.update_xaxes(title="Sum Repeat Send Ticks / Sum Ticks", row=1,col=1, range=_)
fig1.write_html("RepeatSends_Hist.html")
fig1

In [114]:
dfs.sort_values('Repeat Sender Ratio', ascending=False)[0:30][['Route', 'Repeat Sender Ratio']]
# Climbs that are normalized slightly, these can be thought of as underground favorite
# 1 to 4 crack
# 24 Unknown
# Act or React
# Breakfast Social
# Cactus Flower
# Crack Slabbath
# Horse Crack
# Kelley Route 1
# Low Cholesterol
# Mariposa
# More Than One Way
# Pigs On The Wing
# Short and Stupid
# Spam
# Tag Team
# The Ooze
# The Thing
# Think Pink
# Unnamed
# Unnamed 10+ (steep thin hands dihedral to pod)
# Unnamed 9+ (big hands dihedral)

Unnamed: 0,Route,Repeat Sender Ratio
641,Tag Team,1.140351
638,Breakfast Social,1.131579
598,Cactus Flower,1.096774
639,The Ooze,1.092308
632,Hand Solo,1.091575
35,Think Pink,1.085714
86,Unnamed 9+ (big hands dihedral),1.083333
21,Unnamed 5.10 LF flake in deep corner,1.078049
1144,Annunaki,1.076923
677,Act or React,1.071429
