# Project Name: HealthLens – Understanding Patient Data & Health Trends

## Task1. Data Cleaning.

### 1. Import and Load the Dataset

In [49]:
#imporst library
import pandas as pd

In [51]:
# Load dataset
df = pd.read_csv("uncleaned_heart_cleveland_dataset.csv") 

### 2. Rename Technical Column Names

In [54]:
df.rename(columns={
    'age': 'Age',
    'sex': 'Sex',
    'cp': 'ChestPainType',
    'trestbps': 'RestingBP',
    'chol': 'Cholesterol',
    'fbs': 'FastingBS',
    'restecg': 'RestingECG',
    'thalach': 'MaxHR',
    'exang': 'ExerciseAngina',
    'oldpeak': 'Oldpeak',
    'slope': 'ST_Slope',
    'ca': 'NumMajorVessels',
    'thal': 'Thalassemia',
    'condition': 'HeartDisease'
}, inplace=True)


### 3. Handle Missing or Abnormal Values

In [57]:
# Replace '?' with NaN and convert appropriate columns to numeric
df.replace('?', pd.NA, inplace=True)

# Convert specific columns to numeric, forcing errors to NaN
df['NumMajorVessels'] = pd.to_numeric(df['NumMajorVessels'], errors='coerce')
df['Thalassemia'] = pd.to_numeric(df['Thalassemia'], errors='coerce')

# Drop rows with missing values
df.dropna(inplace=True)

# Reset index
df.reset_index(drop=True, inplace=True)


### 4. Convert Categorical Variables to Readable Labels

In [60]:
# Convert 'Sex' column
df['Sex'] = df['Sex'].map({0: 'Female', 1: 'Male'})

# Convert 'ChestPainType' column
df['ChestPainType'] = df['ChestPainType'].map({
    0: 'Typical Angina',
    1: 'Atypical Angina',
    2: 'Non-Anginal Pain',
    3: 'Asymptomatic'
})

# Convert 'FastingBS'
df['FastingBS'] = df['FastingBS'].map({0: '≤120 mg/dl', 1: '>120 mg/dl'})

# Convert 'RestingECG'
df['RestingECG'] = df['RestingECG'].map({
    0: 'Normal',
    1: 'ST-T Abnormality',
    2: 'Left Ventricular Hypertrophy'
})

# Convert 'ExerciseAngina'
df['ExerciseAngina'] = df['ExerciseAngina'].map({0: 'No', 1: 'Yes'})

# Convert 'ST_Slope'
df['ST_Slope'] = df['ST_Slope'].map({
    0: 'Upsloping',
    1: 'Flat',
    2: 'Downsloping'
})

# Convert 'Thalassemia'
df['Thalassemia'] = df['Thalassemia'].map({
    0: 'Normal',
    1: 'Fixed Defect',
    2: 'Reversible Defect'
})

# Convert 'Heart Disease'
df['HeartDisease'] = df['HeartDisease'].map({0: 'No', 1: 'Yes'})

### Checking Null Values

In [63]:
df.isnull().sum()

Age                0
Sex                0
ChestPainType      0
RestingBP          0
Cholesterol        0
FastingBS          0
RestingECG         0
MaxHR              0
ExerciseAngina     0
Oldpeak            0
ST_Slope           0
NumMajorVessels    0
Thalassemia        0
HeartDisease       0
dtype: int64

### Output

In [66]:
print(df.head())

   Age     Sex   ChestPainType  RestingBP  Cholesterol   FastingBS  \
0   69    Male  Typical Angina        160          234  >120 mg/dl   
1   69  Female  Typical Angina        140          239  ≤120 mg/dl   
2   66  Female  Typical Angina        150          226  ≤120 mg/dl   
3   65    Male  Typical Angina        138          282  >120 mg/dl   
4   64    Male  Typical Angina        110          211  ≤120 mg/dl   

                     RestingECG  MaxHR ExerciseAngina  Oldpeak     ST_Slope  \
0  Left Ventricular Hypertrophy    131             No      0.1         Flat   
1                        Normal    151             No      1.8    Upsloping   
2                        Normal    114             No      2.6  Downsloping   
3  Left Ventricular Hypertrophy    174             No      1.4         Flat   
4  Left Ventricular Hypertrophy    144            Yes      1.8         Flat   

   NumMajorVessels Thalassemia HeartDisease  
0                1      Normal           No  
1           

In [68]:
# Save DataFrame to CSV
df.to_csv("cleaned_heart_cleveland_dataset.csv", index=False)