In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder


data = pd.read_csv('diabetes_prediction_dataset.csv')

# Selecting numeric and categorical columns for processing
numeric_columns = ['age', 'bmi', 'HbA1c_level', 'blood_glucose_level']
categorical_columns = ['gender', 'smoking_history']

# Normalize numeric columns using Min-Max scaling
scaler = MinMaxScaler()
data[numeric_columns] = scaler.fit_transform(data[numeric_columns])

# One-hot encode categorical columns
encoder = OneHotEncoder(sparse=False)
encoded_categorical_data = encoder.fit_transform(data[categorical_columns])
encoded_categories = encoder.get_feature_names_out(categorical_columns)
encoded_df = pd.DataFrame(encoded_categorical_data, columns=encoded_categories)

# Combine the processed columns with the original dataframe, excluding original categorical columns
processed_data = pd.concat([data.drop(categorical_columns, axis=1), encoded_df], axis=1)

# Display the first few rows of the processed data
print(processed_data.head())


        age  hypertension  heart_disease       bmi  HbA1c_level  \
0  1.000000             0              1  0.177171     0.563636   
1  0.674675             0              0  0.202031     0.563636   
2  0.349349             0              0  0.202031     0.400000   
3  0.449449             0              0  0.156863     0.272727   
4  0.949950             1              1  0.118231     0.236364   

   blood_glucose_level  diabetes  gender_Female  gender_Male  gender_Other  \
0             0.272727         0            1.0          0.0           0.0   
1             0.000000         0            1.0          0.0           0.0   
2             0.354545         0            0.0          1.0           0.0   
3             0.340909         0            1.0          0.0           0.0   
4             0.340909         0            0.0          1.0           0.0   

   smoking_history_No Info  smoking_history_current  smoking_history_ever  \
0                      0.0                      0.0

