In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Function to identify categorical columns
def identify_categorical_columns(df):
    return df.select_dtypes(include=['object', 'category']).columns.tolist()

# Function that performs Label or One-Hot encoding
def encode_categorical_data(df, columns, encoding_type='label'):
    if encoding_type not in ['label', 'onehot']:
        raise ValueError("encoding_type should be either 'label' or 'onehot'")
    
    df_encoded = df.copy()
    
    if encoding_type == 'label':
        le = LabelEncoder()
        for col in columns:
            df_encoded[col] = le.fit_transform(df_encoded[col])
    
    elif encoding_type == 'onehot':
        df_encoded = pd.get_dummies(df_encoded, columns=columns, dtype=float)
    
    return df_encoded

def main():
    # Load dataset
    df = pd.read_csv('heart.csv')
    print("First few rows of the dataset:")
    print(df.head())
    
    # Identify categorical columns
    categorical_columns = identify_categorical_columns(df)
    print("Categorical columns identified:", categorical_columns)
    
    # Choose encoding type
    encoding_type = input("Choose encoding type ('label' or 'onehot'): ").strip().lower()
    
    # Encode categorical columns
    df_encoded = encode_categorical_data(df, categorical_columns, encoding_type)
    
    # Display the transformed dataset
    print("Transformed dataset:")
    print(df_encoded.head())

if __name__ == "__main__":
    main()


First few rows of the dataset:
   Patient_ID        State_Name  Age  Gender  Diabetes  Hypertension  Obesity  \
0           1         Rajasthan   42  Female         0             0        1   
1           2  Himachal Pradesh   26    Male         0             0        0   
2           3             Assam   78    Male         0             0        1   
3           4            Odisha   58    Male         1             0        1   
4           5         Karnataka   22    Male         0             0        0   

   Smoking  Alcohol_Consumption  Diet_Score  Cholesterol_Level  Stress_Level  \
0        1                    0           9                248             4   
1        0                    1           4                272             7   
2        0                    0           6                268            10   
3        0                    0           9                224             1   
4        0                    0           5                277             9   

 

Choose encoding type ('label' or 'onehot'):  label


Transformed dataset:
   Patient_ID  State_Name  Age  Gender  Diabetes  Hypertension  Obesity  \
0           1          11   42       0         0             0        1   
1           2           5   26       1         0             0        0   
2           3           1   78       1         0             0        1   
3           4          10   58       1         1             0        1   
4           5           6   22       1         0             0        0   

   Smoking  Alcohol_Consumption  Diet_Score  Cholesterol_Level  Stress_Level  \
0        1                    0           9                248             4   
1        0                    1           4                272             7   
2        0                    0           6                268            10   
3        0                    0           9                224             1   
4        0                    0           5                277             9   

   Healthcare_Access  Heart_Attack_History  Hea