In [1]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

df = pd.read_csv('Combined_graphs.csv')


In [7]:
#add a title and description for the streamlit dashboard
st.title('Bouldering Ability Dashboard')
st.write('Here is a dashboard showing the effects of various strength and body composition features on a persons ability to boulder.')

In [8]:
#define the pie chart for gender
def plot_gender():
    st.write('This pie chart shows the gender distribution of boulderers in the dataset.')
    grouped = df.groupby(df.columns[0]).count().max(axis=1)

    labels = ['Female', 'Male', 'Other']
    colors = ['purple', 'thistle', 'indigo']

    fig, ax = plt.subplots(figsize=(6, 6))
    ax.pie(grouped, labels=labels, colors=colors, autopct='%1.1f%%')
    ax.set_title('Gender of Boulderers')
    ax.set_ylabel('')

    ax.legend(labels=labels)

    st.pyplot(fig)

plot_gender()



In [9]:
#create and add all the histograms for the main selected features
st.write('Select a graph of the various features related to climbing to see the distributions')

def plot_height():
    fig, ax = plt.subplots()
    ax.hist(df[df.columns[5]], bins=int(df[df.columns[5]].max()-df[df.columns[5]].min()),
            color='purple', edgecolor='white', linewidth=1.2)
    ax.set_title('Height Distribution')
    ax.set_xlabel('Height (cm)')
    ax.set_ylabel('Count')
    return fig

def plot_Weight():
    fig, ax = plt.subplots(figsize=(15, 5))
    ax.hist(df[df.columns[6]], bins=int(df[df.columns[6]].max()-df[df.columns[6]].min()),
            color='purple', edgecolor='white', linewidth=1.2)
    ax.set_title('Weight Distribution')
    ax.set_xlabel('Weight (KG)')
    ax.set_ylabel('Count')
    return fig

def plot_Span():
    fig, ax = plt.subplots(figsize=(15, 5))
    ax.hist(df[df.columns[7]], bins=int(df[df.columns[7]].max()-df[df.columns[7]].min()),
            color='purple', edgecolor='white', linewidth=1.2)
    ax.set_title('Span Distribution')
    ax.set_xlabel('Span (cm)')
    ax.set_ylabel('Count')
    return fig
    
def plot_maxb():
    fig, ax = plt.subplots(figsize=(15, 5))
    ax.hist(df[df.columns[1]], bins=int(df[df.columns[1]].max()-df[df.columns[1]].min()),
            color='purple', edgecolor='white', linewidth=1.2)
    ax.set_title('Max Boulder Distribution')
    ax.set_xlabel('Max Boulder')
    ax.set_ylabel('Count')
    return fig

#create a dropdown menu to select which plot to display on the dashboard
plot_choice = st.selectbox('Select a plot', ("Height Distribution", "Weight Distribution","Span Distribution","Max Boulder Distribution" ))

if plot_choice == "Height Distribution":
    fig = plot_height()
    st.pyplot(fig)
elif plot_choice == "Weight Distribution":
    fig = plot_Weight()
    st.pyplot(fig)
elif plot_choice == "Span Distribution":
    fig = plot_Span()
    st.pyplot(fig)
elif plot_choice == "Max Boulder Distribution":
    fig = plot_maxb()
    st.pyplot(fig)


In [10]:
#define the graph for variables separated by male and female values

st.write('This shows the comparison of male and female climbers on the various included features')

def plot_gender_profile():
    profile_vars = ['exp', 'pullup', 'pushup', 'height (cm)', 'weight (kg)', 'span (cm)', 'BMI', 'APE', 'pullup_ratio']

    fig, axs = plt.subplots(ncols=2, figsize=(20,5))

    male_data = df.loc[df['gender'] == 'Male', profile_vars]
    male_data.mean().plot.bar(title="Male climber profile", ax=axs[0], color='purple')

    female_data = df.loc[df['gender'] == 'Female', profile_vars]
    female_data.mean().plot.bar(title="Female climber profile", ax=axs[1], color='purple')

    axs[0].set_xlabel('Variables')
    axs[0].set_ylabel('Mean Value')
    axs[1].set_xlabel('Variables')
    axs[1].set_ylabel('Mean Value')
    fig.suptitle('Male vs Female Climber Profile', fontsize=20)

    return fig

fig = plot_gender_profile()
st.pyplot(fig)


DeltaGenerator()

In [11]:
#add a graph showing the importance and correlation of the selected features based on max boulder
st.write('The below graph demonstrates how important each of the selected features is for climbing ability and maximum boulder a climber can do')

def spearman(frame, features):
    spr = pd.DataFrame()
    spr['feature'] = features
    spr['spearman'] = [frame[f].corr(frame['max_boulder'], 'spearman') for f in features]
    spr = spr.sort_values('spearman')

    palette = sns.color_palette("Purples_r", len(spr))

    plt.figure(figsize=(6, 0.25*len(features)))
    sns.barplot(data=spr, y='feature', x='spearman', orient='h', palette=palette)
    plt.title('Spearman correlation with max_boulder')
    plt.xlabel('Spearman correlation')
    
quant_feat =['sex', 'exp', 'pullup', 'pushup', 'height (cm)', 'weight (kg)', 'span (cm)', 'BMI', 'APE', 'pullup_ratio']

st.set_option('deprecation.showPyplotGlobalUse', False)
st.pyplot(spearman(df, quant_feat))


DeltaGenerator()

In [15]:
#graph of top and bottom climbers and how they differ on the selected features
def plot_climbing_metrics(df):
    compare_vars = ["exp", "pullup", "pushup", "height (cm)", "weight (kg)", "span (cm)", "BMI", "APE", "pullup_ratio"]

    grouped = df.groupby("top_or_bottom")[compare_vars].mean()

    diff_mean = grouped.loc["Top"] - grouped.loc["Bottom"]

    fig, ax = plt.subplots(figsize=(10,6))
    diff_mean.plot.barh(ax=ax, color="purple")

    ax.set_title("Comparison of Top and Bottom Climbers on Selected Metrics")
    ax.set_xlabel("Mean Difference")

    st.pyplot(fig)

    st.write('This graph shows the mean difference between the top and bottom climbers for each selected metric. A positive value means that the mean of the top climbers is higher than the mean of the bottom climbers, and a negative value means that the mean of the bottom climbers is higher. Looking at the graph, we can see that the top climbers have higher values for pullup, pushup, APE, and pullup_ratio, while the bottom climbers have higher values for BMI. The other metrics do not show a clear difference between the top and bottom climbers. Therefore, we can say that the top climbers generally have better upper body strength (as indicated by higher values for pullup and pushup), longer arms (as indicated by higher values for APE), and a lower BMI, while the bottom climbers have a higher BMI.')
    
plot_climbing_metrics(df)