In [3]:
%pip install -U pip pandas scikit-learn

Collecting pandas
  Downloading pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl.metadata (11 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Downloading scipy-1.16.3-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl (10.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [5]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the data
df = pd.read_csv('heart_data.csv')

# Extract and store the Class variable separately
y_class = df['Class'].copy()

# Recode binary variables from 1/2 to 0/1
binary_vars = ['Sex', 'FastingBloodSugar', 'ExerciseInduced']
binary_map = {1: 0, 2: 1}
df[binary_vars] = df[binary_vars].apply(lambda col: col.map(binary_map))

# Identify continuous variables
cont_vars = ['Age', 'RestBloodPressure', 'SerumCholestoral', 'MaxHeartRate', 'MajorVessels']

# Standardise the continuous variables (z-scores)
scaler = StandardScaler()
scaled_cont = scaler.fit_transform(df[cont_vars])
scaled_cont_df = pd.DataFrame(scaled_cont, columns=[var + '_z' for var in cont_vars])

# Combine scaled continuous variables with recoded binary and ordinal variables (Slope)
analysis_df = pd.concat([scaled_cont_df, df[binary_vars + ['Slope']]], axis=1)

# Preview the analysis-ready dataset (without Class)
print(analysis_df.head())

      Age_z  RestBloodPressure_z  SerumCholestoral_z  MaxHeartRate_z  \
0  1.712094            -0.075410            1.402212       -1.759208   
1  1.382140            -0.916759            6.093004        0.446409   
2  0.282294            -0.411950            0.219823       -0.375291   
3  1.052186            -0.187590            0.258589       -1.932198   
4  2.152032            -0.636310            0.374890       -1.240239   

   MajorVessels_z  Sex  FastingBloodSugar  ExerciseInduced  Slope  
0        2.472682    1                  0                0      2  
1       -0.711535    0                  0                0      2  
2       -0.711535    1                  0                0      1  
3        0.349871    1                  0                1      2  
4        0.349871    0                  0                1      1  
