In [None]:
import numpy as np
import lightningchart as lc
import pandas as pd


In [3]:
df = pd.read_csv('athletes.csv')

lc.set_license('P001-ZhUezT9ZksTWx1FNJa7ASzAy0S5zEAATHr1GLfqruMFRLEzDry9V-MEYCIQDvahFnLhfzgQ3cY2/d0/qZG5UhZz/otqsi0MhIKhSZqwIhAPxmnHhKYV3zg9SpffCU92GQiWfmny3GmSxtTz4d92vu')
df.head()

Unnamed: 0,athlete_id,name,region,team,affiliate,gender,age,height,weight,fran,...,snatch,deadlift,backsq,pullups,eat,train,background,experience,schedule,howlong
0,2554.0,Pj Ablang,South West,Double Edge,Double Edge CrossFit,Male,24.0,70.0,166.0,,...,,400.0,305.0,,,I workout mostly at a CrossFit Affiliate|I hav...,I played youth or high school level sports|I r...,I began CrossFit with a coach (e.g. at an affi...,I do multiple workouts in a day 2x a week|,4+ years|
1,3517.0,Derek Abdella,,,,Male,42.0,70.0,190.0,,...,,,,,,I have a coach who determines my programming|I...,I played youth or high school level sports|,I began CrossFit with a coach (e.g. at an affi...,I do multiple workouts in a day 2x a week|,4+ years|
2,4691.0,,,,,,,,,,...,,,,,,,,,,
3,5164.0,Abo Brandon,Southern California,LAX CrossFit,LAX CrossFit,Male,40.0,67.0,,211.0,...,200.0,375.0,325.0,25.0,I eat 1-3 full cheat meals per week|,I workout mostly at a CrossFit Affiliate|I hav...,I played youth or high school level sports|,I began CrossFit by trying it alone (without a...,I usually only do 1 workout a day|,4+ years|
4,5286.0,Bryce Abbey,,,,Male,32.0,65.0,149.0,206.0,...,150.0,,325.0,50.0,I eat quality foods but don't measure the amount|,I workout mostly at a CrossFit Affiliate|I inc...,I played college sports|,I began CrossFit by trying it alone (without a...,I usually only do 1 workout a day|I strictly s...,1-2 years|


In [9]:

stats_and_performance_features = df[
    ['age',
     'height_meters',
     'weight_kg',
     'run400',
     'run5k',
     'snatch',
     'deadlift',
     'backsq',
     'pullups'
    ]
]

In [10]:
#Cleaning the selected features

# Removing the upper outliers
df = df[ (df['run400']<150) ]

# Removing the lower outliers
df = df[ (df['run400']>44) ]

# run5k feature

# Removing the upper outliers
df = df[ (df['run5k']<2101) ]

# Removing the lower outliers
df = df[ (df['run5k']>910) ]

# snatch feature

# Removing the upper outliers
df = df[ (df['snatch']<301) ]

# Removing the lower outliers
df = df[ (df['snatch']>55) ]

# deadlift feature

# Removing the upper outliers
df = df[ (df['deadlift']<630) ]

# Removing the lower outliers
df = df[ (df['deadlift']>160) ]

# backsq feature

# Removing the upper outliers
df = df[ (df['backsq']<540) ]

# Removing the lower outliers
df = df[ (df['backsq']>124) ]

# pullups feature


# Removing the upper outliers
df = df[ (df['pullups']<80) ]

# Removing the lower outliers
df = df[ (df['pullups']>0) ]



In [12]:

# Compute correlation matrix
corr_matrix = stats_and_performance_features.corr()
corr_array = corr_matrix.to_numpy()

# Extract min and max correlation values
min_value = corr_array.min()
max_value = corr_array.max()

# Create LightningChart Heatmap
chart = lc.ChartXY(
    title="Correlation Map of Athlete Performance Features",
    theme=lc.Themes.CyberSpace
)

grid_size_x, grid_size_y = corr_array.shape

heatmap_series = chart.add_heatmap_grid_series(
    columns=grid_size_x,
    rows=grid_size_y,
)

heatmap_series.set_start(x=0, y=0)
heatmap_series.set_end(x=grid_size_x, y=grid_size_y)
heatmap_series.set_step(x=1, y=1)
heatmap_series.set_wireframe_stroke(thickness=1, color=lc.Color('lightgrey'))

# Assign correlation values to heatmap
heatmap_series.invalidate_intensity_values(corr_array.tolist())
heatmap_series.set_intensity_interpolation(False)

# Define color scale
palette_steps = [
    {"value": min_value, "color": lc.Color('blue')},  # Negative correlation
    {"value": 0, "color": lc.Color('white')},  # No correlation
    {"value": 1, "color": lc.Color('red')}  # Strong positive correlation
]

heatmap_series.set_palette_coloring(
    steps=palette_steps,
    look_up_property='value',
    interpolate=True
)

# Customize X and Y Axes
x_axis = chart.get_default_x_axis()
y_axis = chart.get_default_y_axis()

x_axis.set_tick_strategy('Empty')
y_axis.set_tick_strategy('Empty')

# Add feature names as axis labels
feature_names = stats_and_performance_features.columns.tolist()
for i, label in enumerate(feature_names):
    custom_tick_x = x_axis.add_custom_tick().set_tick_label_rotation(90)
    custom_tick_x.set_value(i + 0.5)
    custom_tick_x.set_text(label)

    custom_tick_y = y_axis.add_custom_tick()
    custom_tick_y.set_value(i + 0.5)
    custom_tick_y.set_text(label)

# Add legend
chart.add_legend(data=heatmap_series).set_margin(-20)

# Show chart
chart.open(method="browser")
print(f"Min correlation: {min_value}, Max correlation: {max_value}")


Min correlation: -0.0051924118671099065, Max correlation: 1.0


127.0.0.1 - - [04/Mar/2025 10:18:59] "GET / HTTP/1.1" 200 -
