In [1]:
import pandas as pd
import json

df = pd.read_csv('../data/clean.csv')
meta = pd.read_csv('../data/feature_metadata.csv')

multi_features = [
    'other_activities',
    'hangboard_grips',
    'hangboard_style',
    'endurance_style',
    'strength_style'
]
for feature in multi_features:
    df[feature] = [eval(i) if i==i else None for i in df[feature].values ]

key2label = {k:v for k, v in meta[['DataFrameKey', 'Label']].values}

sorted_years = [
    '<1 year','1-2 years', '2-3 years', '3-4 years', '4-5 years', '5-6 years',
    '6-7 years', '7-8 years', '8-9 years', '9-10 years', '10-11 years',
    '11-12 years', '12-13 years', '13-14 years', '14-15 years', '15+ years'
]

strength_metrics = [
    'hardest_boulder_ever',
    'hardest_boulder_confident',
    'hardest_boulder_recent',
    'hardest_route_ever',
    'hardest_route_recent',
    'hardest_route_confident',
    'pullups_maxcount_binned',
    'pushups_maxcount_binned'
]

violin_features = [
    'height_binned',
    'weight_binned',
    'wingspan_binned',
    'years_climbing',
    'indoor_outdoor',
    'sex',
    'pullups_maxcount_binned',
    'pushups_maxcount_binned'
]

corr_features = [
    'hardest_boulder_confident',
    'hardest_route_confident',
    'height',
    'weight',
    'wingspan',
    'pullups_maxcount',
    'pushups_maxcount',
    'weekly_count_climbing',
    'weekly_hours_climbing',
    'weekly_count_campus',
    'weekly_hours_campus',
    'weekly_count_endurance',
    'weekly_count_strength',
    'session_hours_strength'
]

boulder = ["I don't boulder"]+[f'V{i}' for i in range(15)]
route = ["I don't climb routes"]+[str(i) for i in range(11, 34)]
widget_distincts = {
    'other_activities': [i for i in pd.unique(df.other_activities.explode()) if i],
    'hangboard_grips': [i for i in pd.unique(df.hangboard_grips.explode()) if i],
    'hangboard_style': [i for i in pd.unique(df.hangboard_style.explode()) if i],
    'endurance_style': [i for i in pd.unique(df.endurance_style.explode()) if i],
    'strength_style': [i for i in pd.unique(df.strength_style.explode()) if i],
    'height': [150.0, 201.1],
    'weight': [i for i in list(pd.unique(df.weight)) if i],
    'wingspan': [130.0, 208.0],
    'years_climbing': sorted_years,
    'hardest_boulder_confident': boulder,
    'hardest_boulder_recent': boulder,
    'hardest_boulder_ever': boulder,
    'hardest_route_confident': route,
    'hardest_route_recent': route,
    'hardest_route_ever': route,
    'strength_metrics': [key2label[i] for i in strength_metrics],
    'cid': list(df.cid.values),
    'indoor_outdoor': list(pd.unique(df.indoor_outdoor)),
    'sex': ['M', 'F']
}

def get_bins(series, n_bins, unit):
    min = series.min()
    max = series.max()
    bin_size = (max - min)/n_bins
    levels = [min+n*bin_size for n in range(n_bins+1)]

    bins = []

    for i in range(len(levels)):
        if i < len(levels)-1:
            start = int(levels[i])
            end = int(levels[i+1])

            bins.append(f"{start}-{end} {unit}")

    return bins

bin_me = [
    ('height', 'cm'),
    ('weight', 'kg'),
    ('wingspan', 'cm'),
    ('pullups_maxcount', 'reps'),
    ('pushups_maxcount', 'reps')
]
for feature, unit in bin_me:
    widget_distincts[feature+'_binned'] = get_bins(df[feature], 8, unit)

cross_train_group_features = [
    'Years Climbing', 'Pull-up Max', 'Push-up Max', 'Height', 'Weight', 'Wingspan',
    'Endurance Sessions per Week', 'Campus Sessions per Week', 'Strength Sessions per Week', 'Hangboard Sessions per Week'
]
continuous_features = [
    'height',
    'weight',
    'wingspan',
    'pullups_maxcount',
    'pushups_maxcount',
    'weekly_count_campus',
    'weekly_hours_campus',
    'weekly_count_endurance',
    'weekly_count_strength',
    'session_hours_strength'
]

boulder_map = {f'V{i}': i for i in range(15)}
boulder_map["I don't boulder"] = None

route_map = {str(i): i for i in range(11,34)}
route_map["I don't climb routes"] = None

conversion = {
    'hardest_boulder_ever': boulder_map,
    'hardest_boulder_recent': boulder_map,
    'hardest_boulder_confident': boulder_map,
    'hardest_route_ever': route_map,
    'hardest_route_recent': route_map,
    'hardest_route_confident': route_map,
}

config = {
    'widget_options': widget_distincts,
    'conversion': conversion,
    'multi_features': multi_features,
    'continuous_features': continuous_features,
    'violin_features': violin_features,
    'correx_features': corr_features,
    'crossG_features': cross_train_group_features
}

In [2]:
with open('../data/config.json', 'w') as f:
    json.dump(config, f)