In [2]:
import pandas as pd

# Load the data
data = pd.read_csv('fake_health_profiles.csv')

# Drop the 'Name' and 'Recent Medical Procedure' columns as they are not needed
data.drop(columns=['Name', 'Recent Medical Procedure'], inplace=True)

# Convert 'Gender' to binary encoding (0 for Female, 1 for Male)
data['Gender'] = data['Gender'].map({'Female': 0, 'Male': 1})

# Convert 'Blood Type' to one-hot encoding
data = pd.get_dummies(data, columns=['Blood Type', "Parkinson's Subtype"])

# Extract 'Blood Pressure' as separate columns and convert to numeric
data[['Systolic', 'Diastolic']] = data['Blood Pressure'].str.replace(' mmHg', '').str.split('/', expand=True).astype(int)
data.drop(columns=['Blood Pressure'], inplace=True)

# Convert 'Exercise Habits' to binary encoding (0 for "Not Daily", 1 for "Daily")
data['Exercise Habits'] = data['Exercise Habits'].map({'Not Daily': 0, 'Daily': 1})

# If needed, preprocess 'Mental Health' using ordinal encoding
data['Mental Health'] = data['Mental Health'].map({'Fair': 1, 'Good': 2, 'Excellent': 3})

# Now, 'BMI' remains as a numerical column

# Save the preprocessed data to a new CSV file if needed
data.to_csv('preprocessed_health_profiles.csv', index=False)
